From df7b6759d3e79624f3d23e020257478d0774384c Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 29 May 2026 04:37:09 +0530 Subject: [PATCH] Extract SPOG org-id from cluster httpPath for non-Thrift requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For all-purpose-compute Thrift connections on SPOG (custom-URL) hosts httpPath is /sql/protocolv1/o// and the workspace ID is encoded in the path itself. PoPP routes the Thrift request correctly off the /o// segment, so the connection succeeds without an explicit ?o= query parameter. Other clients on the same driver (telemetry pushes to /telemetry-ext, feature-flag fetches, ...) hit different paths that don't carry the workspace ID. Previously extractSpogHeaders only looked at ?o= in httpPath, so the x-databricks-org-id header was never set for cluster URLs without ?o=. On SPOG hosts PoPP then had no workspace context for these requests and redirected them to /login, silently dropping telemetry. Extend extractSpogHeaders to also extract the workspace ID from the cluster path segment via clusterPathOrgIDPattern as a fallback when ?o= is absent. Priority order is preserved: ?o= query param wins over the path-segment match. Adds four new test cases — cluster path without ?o=, leading-slash variant, ?o= wins precedence, and a warehouse-path regression guard so the new regex does not match warehouse paths. Signed-off-by: Madhavendra Rathore --- connector.go | 78 +++++++++++++++++++++++++----------------- connector_spog_test.go | 25 ++++++++++++++ 2 files changed, 72 insertions(+), 31 deletions(-) diff --git a/connector.go b/connector.go index 467addd..4f15d21 100644 --- a/connector.go +++ b/connector.go @@ -7,6 +7,7 @@ import ( "fmt" "net/http" "net/url" + "regexp" "strings" "time" @@ -136,44 +137,59 @@ func NewConnector(options ...ConnOption) (driver.Connector, error) { return &connector{cfg: cfg, client: client}, nil } -// extractSpogHeaders extracts ?o= from httpPath and returns -// an x-databricks-org-id header for SPOG routing. +// clusterPathOrgIDPattern matches the workspace ID inside an all-purpose-compute +// Thrift path of the form [/]sql/protocolv1/o//[/...]. +var clusterPathOrgIDPattern = regexp.MustCompile(`(?:^|/)sql/protocolv1/o/(\d+)/[^/?]+`) + +// extractSpogHeaders inspects httpPath for the workspace ID and returns it as an +// x-databricks-org-id header dict for SPOG routing. +// +// Two sources are checked, in priority order: +// 1. ?o= query parameter (warehouse paths on SPOG typically use +// this form, e.g. /sql/1.0/warehouses/?o=). +// 2. /sql/protocolv1/o// path segment (all-purpose +// cluster paths embed the workspace in the path itself). // -// On SPOG (Custom URL) workspaces, httpPath is of the form -// /sql/1.0/warehouses/?o=. The ?o= parameter keeps Thrift -// requests routed to the correct workspace via the URL itself, but other -// endpoints (telemetry, feature flags) run on separate hosts and need the -// x-databricks-org-id header. This function extracts ?o= from httpPath once -// and returns it so those paths can inject it as an HTTP header. +// Thrift requests are routed by the URL itself, but other endpoints +// (telemetry, feature flags) run on separate paths that don't carry the +// workspace ID — without this header, PoPP on SPOG hosts can't determine the +// workspace and redirects the request to /login. // -// Returns nil if: -// - httpPath has no query string ("?"), or -// - the query string is malformed and can't be parsed, or -// - the ?o= parameter is missing or empty. +// Returns nil if no workspace ID can be determined. func extractSpogHeaders(httpPath string) map[string]string { - if !strings.Contains(httpPath, "?") { + if httpPath == "" { return nil } - // Parse query string from httpPath - parts := strings.SplitN(httpPath, "?", 2) - params, err := url.ParseQuery(parts[1]) - if err != nil { - logger.Debug().Msgf( - "SPOG header extraction: malformed query string in httpPath, skipping org-id extraction: %s", - err) - return nil + + // 1) ?o= query parameter. + if strings.Contains(httpPath, "?") { + parts := strings.SplitN(httpPath, "?", 2) + params, err := url.ParseQuery(parts[1]) + if err != nil { + logger.Debug().Msgf( + "SPOG header extraction: malformed query string in httpPath, falling back to path inspection: %s", + err) + } else if orgID := params.Get("o"); orgID != "" { + logger.Debug().Msgf( + "SPOG header extraction: injecting x-databricks-org-id=%s (extracted from ?o= in httpPath)", + orgID) + return map[string]string{"x-databricks-org-id": orgID} + } } - orgID := params.Get("o") - if orgID == "" { - logger.Debug().Msg( - "SPOG header extraction: httpPath has query string but no ?o= param, " + - "skipping x-databricks-org-id injection") - return nil + + // 2) /sql/protocolv1/o// path segment. + if match := clusterPathOrgIDPattern.FindStringSubmatch(httpPath); match != nil { + orgID := match[1] + logger.Debug().Msgf( + "SPOG header extraction: injecting x-databricks-org-id=%s (extracted from cluster path segment)", + orgID) + return map[string]string{"x-databricks-org-id": orgID} } - logger.Debug().Msgf( - "SPOG header extraction: injecting x-databricks-org-id=%s (extracted from ?o= in httpPath)", - orgID) - return map[string]string{"x-databricks-org-id": orgID} + + logger.Debug().Msg( + "SPOG header extraction: no workspace ID found in httpPath, " + + "skipping x-databricks-org-id injection") + return nil } // withSpogHeaders returns a new *http.Client that reuses the transport of the diff --git a/connector_spog_test.go b/connector_spog_test.go index 69273ee..04b695e 100644 --- a/connector_spog_test.go +++ b/connector_spog_test.go @@ -57,6 +57,31 @@ func TestExtractSpogHeaders(t *testing.T) { httpPath: "/sql/1.0/warehouses/abc?", want: nil, }, + { + // All-purpose cluster paths embed the workspace ID in /o//. + // Without ?o=, the driver must still extract it so non-Thrift endpoints + // (telemetry, feature flags) get x-databricks-org-id on SPOG hosts. + name: "cluster path without ?o= extracts org id from path segment", + httpPath: "sql/protocolv1/o/6051921418418893/0528-220959-uzmcn1qt", + want: map[string]string{"x-databricks-org-id": "6051921418418893"}, + }, + { + name: "cluster path with leading slash also extracts", + httpPath: "/sql/protocolv1/o/6051921418418893/0528-220959-uzmcn1qt", + want: map[string]string{"x-databricks-org-id": "6051921418418893"}, + }, + { + name: "?o= query param wins over cluster path segment", + httpPath: "sql/protocolv1/o/111/0528-220959-uzmcn1qt?o=222", + want: map[string]string{"x-databricks-org-id": "222"}, + }, + { + // Regression guard: the new cluster-path regex must not match + // warehouse paths (which never embed the workspace ID). + name: "warehouse path without ?o= still returns nil", + httpPath: "/sql/1.0/warehouses/abc123", + want: nil, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) {