From 5d36691421047052fff671328ea5d9bbcc3f0b6f Mon Sep 17 00:00:00 2001 From: Bill Guowei Yang Date: Sat, 9 May 2026 10:05:03 -0400 Subject: [PATCH 1/3] feat: bundle PostHog ducklake fork (v1.0-posthog.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switches the ducklake extension shipped in the duckgres and duckgres-worker images from the upstream extensions.duckdb.org build to PostHog's fork at https://github.com/PostHog/ducklake/releases. Mirrors the existing httpfs override pattern. Also adds ducklake.duckdb_extension to shouldRefreshBundledExtension so the cached copy on a long-lived data-dir PVC is replaced when the image ships a newer fork build — same rationale as postgres_scanner. Co-Authored-By: Claude Opus 4.7 --- Dockerfile | 9 ++++---- Dockerfile.worker | 9 ++++---- server/bundled_extensions_test.go | 35 +++++++++++++++++++++++++++++++ server/server.go | 6 +++++- 4 files changed, 50 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3c37dff7..68b91326 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,16 +12,17 @@ ARG BUILD_TAGS="" ARG TARGETARCH ARG DUCKDB_EXTENSION_VERSION=1.5.2 ARG HTTPFS_EXTENSION_TAG=v1.5.2-stoi-fix +ARG DUCKLAKE_EXTENSION_TAG=v1.0-posthog.1 ARG DUCKDB_EXTENSION_REPOSITORY=https://extensions.duckdb.org ARG DUCKDB_NIGHTLY_EXTENSION_REPOSITORY=http://nightly-extensions.duckdb.org RUN CGO_ENABLED=1 go build -tags "${BUILD_TAGS}" -ldflags "-X main.version=${VERSION} -X main.commit=${COMMIT} -X main.date=$(date -u +%Y-%m-%dT%H:%M:%SZ)" -o duckgres . RUN mkdir -p "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}" \ && curl -fsSL "https://github.com/benben/duckdb-httpfs/releases/download/${HTTPFS_EXTENSION_TAG}/httpfs-linux-${TARGETARCH}.duckdb_extension" \ -o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/httpfs.duckdb_extension" \ - && for ext in ducklake json; do \ - curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension.gz" \ - | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension"; \ - done \ + && curl -fsSL "https://github.com/PostHog/ducklake/releases/download/${DUCKLAKE_EXTENSION_TAG}/ducklake-linux-${TARGETARCH}.duckdb_extension" \ + -o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/ducklake.duckdb_extension" \ + && curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension.gz" \ + | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension" \ && curl -fsSL "${DUCKDB_NIGHTLY_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension.gz" \ | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension" diff --git a/Dockerfile.worker b/Dockerfile.worker index c7d6d145..5a39e90b 100644 --- a/Dockerfile.worker +++ b/Dockerfile.worker @@ -50,6 +50,7 @@ RUN go mod download ARG DUCKDB_EXTENSION_VERSION=1.5.2 ARG HTTPFS_EXTENSION_TAG=v1.5.2-stoi-fix +ARG DUCKLAKE_EXTENSION_TAG=v1.0-posthog.1 ARG DUCKDB_EXTENSION_REPOSITORY=https://extensions.duckdb.org ARG DUCKDB_NIGHTLY_EXTENSION_REPOSITORY=http://nightly-extensions.duckdb.org @@ -78,10 +79,10 @@ RUN if [ -n "$DUCKDB_BINDINGS_VERSION" ]; then \ RUN mkdir -p "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}" \ && curl -fsSL "https://github.com/benben/duckdb-httpfs/releases/download/${HTTPFS_EXTENSION_TAG}/httpfs-linux-${TARGETARCH}.duckdb_extension" \ -o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/httpfs.duckdb_extension" \ - && for ext in ducklake json; do \ - curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension.gz" \ - | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension"; \ - done \ + && curl -fsSL "https://github.com/PostHog/ducklake/releases/download/${DUCKLAKE_EXTENSION_TAG}/ducklake-linux-${TARGETARCH}.duckdb_extension" \ + -o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/ducklake.duckdb_extension" \ + && curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension.gz" \ + | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension" \ && curl -fsSL "${DUCKDB_NIGHTLY_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension.gz" \ | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension" diff --git a/server/bundled_extensions_test.go b/server/bundled_extensions_test.go index 071f7f89..b9f4d7a7 100644 --- a/server/bundled_extensions_test.go +++ b/server/bundled_extensions_test.go @@ -114,6 +114,41 @@ func TestSeedBundledExtensionsReplacesExistingFilesWithUpdatedContents(t *testin } } +func TestSeedBundledExtensionsRefreshesDucklake(t *testing.T) { + srcRoot := t.TempDir() + dstRoot := t.TempDir() + + srcDir := filepath.Join(srcRoot, "v1.5.2", "linux_arm64") + if err := os.MkdirAll(srcDir, 0o755); err != nil { + t.Fatalf("mkdir src: %v", err) + } + srcExt := filepath.Join(srcDir, "ducklake.duckdb_extension") + if err := os.WriteFile(srcExt, []byte("posthog-fork"), 0o644); err != nil { + t.Fatalf("write src extension: %v", err) + } + + dstDir := filepath.Join(dstRoot, "v1.5.2", "linux_arm64") + if err := os.MkdirAll(dstDir, 0o755); err != nil { + t.Fatalf("mkdir dst: %v", err) + } + dstExt := filepath.Join(dstDir, "ducklake.duckdb_extension") + if err := os.WriteFile(dstExt, []byte("upstream"), 0o644); err != nil { + t.Fatalf("write dst extension: %v", err) + } + + if err := seedBundledExtensions(srcRoot, dstRoot); err != nil { + t.Fatalf("seedBundledExtensions: %v", err) + } + + got, err := os.ReadFile(dstExt) + if err != nil { + t.Fatalf("read dst extension: %v", err) + } + if string(got) != "posthog-fork" { + t.Fatalf("expected cached ducklake to be replaced by bundled fork, got %q", string(got)) + } +} + func TestSeedBundledExtensionsPreservesNonTargetedChangedFiles(t *testing.T) { srcRoot := t.TempDir() dstRoot := t.TempDir() diff --git a/server/server.go b/server/server.go index 5b237396..650e69cb 100644 --- a/server/server.go +++ b/server/server.go @@ -983,7 +983,11 @@ func seedBundledExtensions(srcRoot, dstRoot string) error { } func shouldRefreshBundledExtension(srcPath string) bool { - return filepath.Base(srcPath) == "postgres_scanner.duckdb_extension" + switch filepath.Base(srcPath) { + case "postgres_scanner.duckdb_extension", "ducklake.duckdb_extension": + return true + } + return false } func copyFile(srcPath, dstPath string, mode os.FileMode) error { From 256d74f3a03e2a124c99644afb90711771531617 Mon Sep 17 00:00:00 2001 From: Bill Guowei Yang Date: Sat, 9 May 2026 10:11:31 -0400 Subject: [PATCH 2/3] test: drop redundant ducklake refresh test Already covered by TestSeedBundledExtensionsReplacesExistingFilesWithUpdatedContents exercising the same code path via postgres_scanner. Co-Authored-By: Claude Opus 4.7 --- server/bundled_extensions_test.go | 35 ------------------------------- 1 file changed, 35 deletions(-) diff --git a/server/bundled_extensions_test.go b/server/bundled_extensions_test.go index b9f4d7a7..071f7f89 100644 --- a/server/bundled_extensions_test.go +++ b/server/bundled_extensions_test.go @@ -114,41 +114,6 @@ func TestSeedBundledExtensionsReplacesExistingFilesWithUpdatedContents(t *testin } } -func TestSeedBundledExtensionsRefreshesDucklake(t *testing.T) { - srcRoot := t.TempDir() - dstRoot := t.TempDir() - - srcDir := filepath.Join(srcRoot, "v1.5.2", "linux_arm64") - if err := os.MkdirAll(srcDir, 0o755); err != nil { - t.Fatalf("mkdir src: %v", err) - } - srcExt := filepath.Join(srcDir, "ducklake.duckdb_extension") - if err := os.WriteFile(srcExt, []byte("posthog-fork"), 0o644); err != nil { - t.Fatalf("write src extension: %v", err) - } - - dstDir := filepath.Join(dstRoot, "v1.5.2", "linux_arm64") - if err := os.MkdirAll(dstDir, 0o755); err != nil { - t.Fatalf("mkdir dst: %v", err) - } - dstExt := filepath.Join(dstDir, "ducklake.duckdb_extension") - if err := os.WriteFile(dstExt, []byte("upstream"), 0o644); err != nil { - t.Fatalf("write dst extension: %v", err) - } - - if err := seedBundledExtensions(srcRoot, dstRoot); err != nil { - t.Fatalf("seedBundledExtensions: %v", err) - } - - got, err := os.ReadFile(dstExt) - if err != nil { - t.Fatalf("read dst extension: %v", err) - } - if string(got) != "posthog-fork" { - t.Fatalf("expected cached ducklake to be replaced by bundled fork, got %q", string(got)) - } -} - func TestSeedBundledExtensionsPreservesNonTargetedChangedFiles(t *testing.T) { srcRoot := t.TempDir() dstRoot := t.TempDir() From bf791f057566cf6d9e82d284fc4d0a879fff491c Mon Sep 17 00:00:00 2001 From: Bill Guowei Yang Date: Sat, 9 May 2026 10:35:27 -0400 Subject: [PATCH 3/3] test(k8s): assert worker loads PostHog ducklake fork After bundling the v1.0-posthog.1 fork in Dockerfile.worker, add a kind- cluster integration test that queries duckdb_extensions().extension_version on a worker and asserts it matches the fork's commit short SHA (90dc1f24) rather than the upstream build's. The expected value lives next to the test with a comment pointing at DUCKLAKE_EXTENSION_TAG in the Dockerfiles so the two move together. Co-Authored-By: Claude Opus 4.7 --- tests/k8s/k8s_test.go | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/k8s/k8s_test.go b/tests/k8s/k8s_test.go index 0e9ef2b6..576ab164 100644 --- a/tests/k8s/k8s_test.go +++ b/tests/k8s/k8s_test.go @@ -188,6 +188,36 @@ func TestK8sSharedWarmWorkerActivation(t *testing.T) { } } +// expectedDucklakeExtensionVersion is the short SHA of the commit +// PostHog/ducklake's v1.0-posthog.1 tag points at. DuckDB's +// EXT_VERSION_DUCKLAKE macro embeds this string at build time and exposes +// it via duckdb_extensions().extension_version. Bump this in lock-step +// with DUCKLAKE_EXTENSION_TAG in Dockerfile / Dockerfile.worker. +const expectedDucklakeExtensionVersion = "90dc1f24" + +// TestK8sDucklakeExtensionIsBundledFork asserts the worker pods load the +// PostHog ducklake fork bundled by Dockerfile.worker, not the upstream +// build that DuckDB would otherwise fetch from extensions.duckdb.org. +// The version string is the short SHA of the fork's tagged commit. +func TestK8sDucklakeExtensionIsBundledFork(t *testing.T) { + if err := retryQueryWithReconnect("SELECT 1", 30*time.Second); err != nil { + t.Fatalf("warm-up query failed: %v", err) + } + + var version string + if err := retryScanStringWithReconnect( + "SELECT extension_version FROM duckdb_extensions() WHERE extension_name = 'ducklake' AND loaded", + 60*time.Second, &version, + ); err != nil { + t.Fatalf("query ducklake extension_version: %v", err) + } + if version != expectedDucklakeExtensionVersion { + t.Fatalf("ducklake extension_version = %q, want %q (PostHog fork v1.0-posthog.1). "+ + "If the bundled fork was upgraded, update expectedDucklakeExtensionVersion alongside DUCKLAKE_EXTENSION_TAG.", + version, expectedDucklakeExtensionVersion) + } +} + func TestK8sWorkerCrashRecovery(t *testing.T) { // Run a query to ensure a worker exists if err := retryQueryWithReconnect("SELECT 1", 30*time.Second); err != nil { @@ -739,6 +769,12 @@ func retryScanIntWithReconnect(query string, timeout time.Duration, dest *int) e }) } +func retryScanStringWithReconnect(query string, timeout time.Duration, dest *string) error { + return retryDBOperationWithReconnectAs("postgres", "postgres", timeout, fmt.Sprintf("query %q", query), func(ctx context.Context, db *sql.DB) error { + return db.QueryRowContext(ctx, query).Scan(dest) + }) +} + func retryQueryWithReconnectAs(username, password, query string, timeout time.Duration) error { return retryDBOperationWithReconnectAs(username, password, timeout, fmt.Sprintf("query %q", query), func(ctx context.Context, db *sql.DB) error { var result int