diff --git a/Dockerfile b/Dockerfile index 3c37dff7..68b91326 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,16 +12,17 @@ ARG BUILD_TAGS="" ARG TARGETARCH ARG DUCKDB_EXTENSION_VERSION=1.5.2 ARG HTTPFS_EXTENSION_TAG=v1.5.2-stoi-fix +ARG DUCKLAKE_EXTENSION_TAG=v1.0-posthog.1 ARG DUCKDB_EXTENSION_REPOSITORY=https://extensions.duckdb.org ARG DUCKDB_NIGHTLY_EXTENSION_REPOSITORY=http://nightly-extensions.duckdb.org RUN CGO_ENABLED=1 go build -tags "${BUILD_TAGS}" -ldflags "-X main.version=${VERSION} -X main.commit=${COMMIT} -X main.date=$(date -u +%Y-%m-%dT%H:%M:%SZ)" -o duckgres . RUN mkdir -p "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}" \ && curl -fsSL "https://github.com/benben/duckdb-httpfs/releases/download/${HTTPFS_EXTENSION_TAG}/httpfs-linux-${TARGETARCH}.duckdb_extension" \ -o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/httpfs.duckdb_extension" \ - && for ext in ducklake json; do \ - curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension.gz" \ - | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension"; \ - done \ + && curl -fsSL "https://github.com/PostHog/ducklake/releases/download/${DUCKLAKE_EXTENSION_TAG}/ducklake-linux-${TARGETARCH}.duckdb_extension" \ + -o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/ducklake.duckdb_extension" \ + && curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension.gz" \ + | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension" \ && curl -fsSL "${DUCKDB_NIGHTLY_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension.gz" \ | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension" diff --git a/Dockerfile.worker b/Dockerfile.worker index c7d6d145..5a39e90b 100644 --- a/Dockerfile.worker +++ b/Dockerfile.worker @@ -50,6 +50,7 @@ RUN go mod download ARG DUCKDB_EXTENSION_VERSION=1.5.2 ARG HTTPFS_EXTENSION_TAG=v1.5.2-stoi-fix +ARG DUCKLAKE_EXTENSION_TAG=v1.0-posthog.1 ARG DUCKDB_EXTENSION_REPOSITORY=https://extensions.duckdb.org ARG DUCKDB_NIGHTLY_EXTENSION_REPOSITORY=http://nightly-extensions.duckdb.org @@ -78,10 +79,10 @@ RUN if [ -n "$DUCKDB_BINDINGS_VERSION" ]; then \ RUN mkdir -p "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}" \ && curl -fsSL "https://github.com/benben/duckdb-httpfs/releases/download/${HTTPFS_EXTENSION_TAG}/httpfs-linux-${TARGETARCH}.duckdb_extension" \ -o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/httpfs.duckdb_extension" \ - && for ext in ducklake json; do \ - curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension.gz" \ - | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension"; \ - done \ + && curl -fsSL "https://github.com/PostHog/ducklake/releases/download/${DUCKLAKE_EXTENSION_TAG}/ducklake-linux-${TARGETARCH}.duckdb_extension" \ + -o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/ducklake.duckdb_extension" \ + && curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension.gz" \ + | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension" \ && curl -fsSL "${DUCKDB_NIGHTLY_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension.gz" \ | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension" diff --git a/server/server.go b/server/server.go index 5b237396..650e69cb 100644 --- a/server/server.go +++ b/server/server.go @@ -983,7 +983,11 @@ func seedBundledExtensions(srcRoot, dstRoot string) error { } func shouldRefreshBundledExtension(srcPath string) bool { - return filepath.Base(srcPath) == "postgres_scanner.duckdb_extension" + switch filepath.Base(srcPath) { + case "postgres_scanner.duckdb_extension", "ducklake.duckdb_extension": + return true + } + return false } func copyFile(srcPath, dstPath string, mode os.FileMode) error { diff --git a/tests/k8s/k8s_test.go b/tests/k8s/k8s_test.go index 0e9ef2b6..576ab164 100644 --- a/tests/k8s/k8s_test.go +++ b/tests/k8s/k8s_test.go @@ -188,6 +188,36 @@ func TestK8sSharedWarmWorkerActivation(t *testing.T) { } } +// expectedDucklakeExtensionVersion is the short SHA of the commit +// PostHog/ducklake's v1.0-posthog.1 tag points at. DuckDB's +// EXT_VERSION_DUCKLAKE macro embeds this string at build time and exposes +// it via duckdb_extensions().extension_version. Bump this in lock-step +// with DUCKLAKE_EXTENSION_TAG in Dockerfile / Dockerfile.worker. +const expectedDucklakeExtensionVersion = "90dc1f24" + +// TestK8sDucklakeExtensionIsBundledFork asserts the worker pods load the +// PostHog ducklake fork bundled by Dockerfile.worker, not the upstream +// build that DuckDB would otherwise fetch from extensions.duckdb.org. +// The version string is the short SHA of the fork's tagged commit. +func TestK8sDucklakeExtensionIsBundledFork(t *testing.T) { + if err := retryQueryWithReconnect("SELECT 1", 30*time.Second); err != nil { + t.Fatalf("warm-up query failed: %v", err) + } + + var version string + if err := retryScanStringWithReconnect( + "SELECT extension_version FROM duckdb_extensions() WHERE extension_name = 'ducklake' AND loaded", + 60*time.Second, &version, + ); err != nil { + t.Fatalf("query ducklake extension_version: %v", err) + } + if version != expectedDucklakeExtensionVersion { + t.Fatalf("ducklake extension_version = %q, want %q (PostHog fork v1.0-posthog.1). "+ + "If the bundled fork was upgraded, update expectedDucklakeExtensionVersion alongside DUCKLAKE_EXTENSION_TAG.", + version, expectedDucklakeExtensionVersion) + } +} + func TestK8sWorkerCrashRecovery(t *testing.T) { // Run a query to ensure a worker exists if err := retryQueryWithReconnect("SELECT 1", 30*time.Second); err != nil { @@ -739,6 +769,12 @@ func retryScanIntWithReconnect(query string, timeout time.Duration, dest *int) e }) } +func retryScanStringWithReconnect(query string, timeout time.Duration, dest *string) error { + return retryDBOperationWithReconnectAs("postgres", "postgres", timeout, fmt.Sprintf("query %q", query), func(ctx context.Context, db *sql.DB) error { + return db.QueryRowContext(ctx, query).Scan(dest) + }) +} + func retryQueryWithReconnectAs(username, password, query string, timeout time.Duration) error { return retryDBOperationWithReconnectAs(username, password, timeout, fmt.Sprintf("query %q", query), func(ctx context.Context, db *sql.DB) error { var result int