From f624951b3da7a805f3802b057cf30d8241c648a3 Mon Sep 17 00:00:00 2001 From: Jeff Levin Date: Fri, 15 May 2026 14:16:24 -0800 Subject: [PATCH] add deny flags for privileged, cap-add, and host namespaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Body-inspecting bind mount validation already rejects unsafe `HostConfig.Binds` sources, but the same code path silently accepts other `HostConfig` fields that punch through container isolation: `Privileged`, `CapAdd`, and the host-namespace modes (`NetworkMode`/`PidMode`/`IpcMode`/`UTSMode`/`UsernsMode`). This adds three opt-in deny flags that extend the existing body inspection in `POST /containers/create` and `POST /containers/{id}/update`. Each defaults to off, so upgraders see no behavior change. - `-denyprivileged` / `SP_DENYPRIVILEGED` — rejects `Privileged=true` - `-denycapadd` / `SP_DENYCAPADD` — rejects non-empty `CapAdd` - `-denyhostnamespaces` / `SP_DENYHOSTNAMESPACES` — rejects `host` (or `host:*`) values in any of the namespace mode fields The deny flags are also exposed as per-container labels with the new `socket-proxy.deny.` prefix (e.g. `socket-proxy.deny.privileged=true`) to match the existing per-container allowlist pattern. Swarm services do not surface these `HostConfig` fields in their API, so the new flags have no effect on service create/update; only the bind mount filter applies there as before. --- README.md | 32 +++- cmd/socket-proxy/bindmount.go | 115 ++++++++++--- cmd/socket-proxy/bindmount_test.go | 227 +++++++++++++++++++++++++- cmd/socket-proxy/handlehttprequest.go | 11 +- internal/config/config.go | 112 ++++++++++--- internal/config/config_test.go | 166 ++++++++++++++++++- 6 files changed, 619 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 33811ff..6c74977 100644 --- a/README.md +++ b/README.md @@ -123,13 +123,37 @@ Bind mount restrictions are applied to relevant Docker API endpoints and work wi **Note**: This feature only restricts bind mounts. Other mount types (volumes, tmpfs, etc.) are not affected by this restriction. +#### Restricting dangerous host config fields + +In addition to bind mount source allowlisting, socket-proxy can reject container creation requests that set `HostConfig` fields that break out of container isolation. Each check is opt-in via its own flag and defaults to off, so existing deployments are unaffected on upgrade. + +| Flag | Env variable | Rejects when | +| --- | --- | --- | +| `-denyprivileged` | `SP_DENYPRIVILEGED` | `HostConfig.Privileged == true` | +| `-denycapadd` | `SP_DENYCAPADD` | `HostConfig.CapAdd` is non-empty (any added kernel capability) | +| `-denyhostnamespaces` | `SP_DENYHOSTNAMESPACES` | `HostConfig.NetworkMode` / `PidMode` / `IpcMode` / `UTSMode` / `UsernsMode` equals `host` (or a `host:*` form for modes that accept it) | + +These checks apply to `POST /containers/create` and `POST /containers/{id}/update`. Swarm services do not surface these `HostConfig` fields in their API, so these flags have no effect there; only the bind mount filter applies to service create/update. + +Example combining bind mount restrictions and host config deny flags: + +```text +socket-proxy \ + -allowbindmountfrom=/srv/projects \ + -denyprivileged \ + -denycapadd \ + -denyhostnamespaces \ + -allowGET='/v[0-9.]+/(_ping|version|containers/.*|images/.*)' \ + -allowPOST='/v[0-9.]+/(containers/create|containers/.*/(start|stop|restart|kill))' +``` + #### Setting up per-container allowlists Allowlists for both requests and bind mount restrictions can be specified for particular containers. To do this: 1. Set `-proxycontainername` or the environment variable `SP_PROXYCONTAINERNAME` to the name of the socket proxy container. 2. Make sure that each container that will use the socket proxy is in a Docker network that the socket proxy container is also in. -3. Use the same regex syntax for request allowlists and for bind mount restrictions that were discussed earlier, but for labels on each container that will use the socket proxy. Each label name will have the prefix of `socket-proxy.allow.`, with `socket-proxy.allow.bindmountfrom` for bind mount restrictions. For example: +3. Use the same regex syntax for request allowlists and for bind mount restrictions that were discussed earlier, but for labels on each container that will use the socket proxy. Each label name will have the prefix of `socket-proxy.allow.`, with `socket-proxy.allow.bindmountfrom` for bind mount restrictions. Host config deny flags use the `socket-proxy.deny.` prefix and take a boolean string (`true`, `1`, `false`, `0`). For example: ```yaml services: @@ -142,6 +166,9 @@ services: - 'socket-proxy.allow.get=.*' # allow all GET requests to socket-proxy - 'socket-proxy.allow.head=/version' # HEAD `/version` requests to socket-proxy - 'socket-proxy.allow.head.1=/exec' # another HEAD `exec` requests to socket-proxy + - 'socket-proxy.deny.privileged=true' # reject container creation with HostConfig.Privileged=true + - 'socket-proxy.deny.capadd=true' # reject container creation that adds kernel capabilities + - 'socket-proxy.deny.hostnamespaces=true' # reject host NetworkMode/PidMode/IpcMode/UTSMode/UsernsMode ``` When this is used, it is not necessary to specify the container in `-allowfrom` as the presence of the allowlist labels will grant corresponding access. @@ -249,6 +276,9 @@ socket-proxy can be configured via command-line parameters or via environment va | `-proxysocketendpoint` | `SP_PROXYSOCKETENDPOINT` | (not set) | Proxy to the given unix socket instead of a TCP port | | `-proxysocketendpointfilemode` | `SP_PROXYSOCKETENDPOINTFILEMODE` | `0600` | Explicitly set the file mode for the filtered unix socket endpoint (only useful with `-proxysocketendpoint`) | | `-proxycontainername` | `SP_PROXYCONTAINERNAME` | (not set) | Provides the name of the socket proxy container to enable per-container allowlists specified by Docker container labels (not available with `-proxysocketendpoint`) | +| `-denyprivileged` | `SP_DENYPRIVILEGED` | (not set/false) | If set, reject container creation/update requests that set `HostConfig.Privileged=true`. Defaults to off. | +| `-denycapadd` | `SP_DENYCAPADD` | (not set/false) | If set, reject container creation/update requests that add kernel capabilities via `HostConfig.CapAdd`. Defaults to off. | +| `-denyhostnamespaces` | `SP_DENYHOSTNAMESPACES` | (not set/false) | If set, reject container creation/update requests that request host namespaces via `HostConfig.NetworkMode` / `PidMode` / `IpcMode` / `UTSMode` / `UsernsMode` equal to `host`. Defaults to off. | ### Changelog diff --git a/cmd/socket-proxy/bindmount.go b/cmd/socket-proxy/bindmount.go index 38671c3..c4c5381 100644 --- a/cmd/socket-proxy/bindmount.go +++ b/cmd/socket-proxy/bindmount.go @@ -52,8 +52,15 @@ type ( } // containerHostConfig is the subset of github.com/docker/docker/api/types/container.HostConfig. containerHostConfig struct { - Binds []string // List of volume bindings for this container. - Mounts []mountMount `json:",omitempty"` // Mounts specs used by the container. + Binds []string // List of volume bindings for this container. + Mounts []mountMount `json:",omitempty"` // Mounts specs used by the container. + Privileged bool `json:",omitempty"` // Is the container in privileged mode. + CapAdd []string `json:",omitempty"` // List of kernel capabilities to add to the container. + NetworkMode string `json:",omitempty"` // Network namespace ("host" gives host networking). + PidMode string `json:",omitempty"` // PID namespace ("host" gives host PID). + IpcMode string `json:",omitempty"` // IPC namespace ("host" gives host IPC). + UTSMode string `json:",omitempty"` // UTS namespace ("host" gives host UTS). + UsernsMode string `json:",omitempty"` // User namespace mode ("host" disables user namespace remapping). } // swarmServiceSpec is the subset of github.com/docker/docker/api/types/swarm.ServiceSpec. swarmServiceSpec struct { @@ -78,10 +85,25 @@ type ( } ) -// checkBindMountRestrictions checks if bind mounts in the request are allowed. -func checkBindMountRestrictions(allowedBindMounts []string, r *http.Request) error { - // Only check if bind mount restrictions are configured - if len(allowedBindMounts) == 0 { +// hostConfigPolicy defines optional host config security restrictions applied +// alongside bind mount source allowlisting. The zero value means no extra +// restrictions are enforced beyond bind mount validation. +type hostConfigPolicy struct { + DenyPrivileged bool // reject HostConfig.Privileged == true + DenyCapAdd bool // reject non-empty HostConfig.CapAdd + DenyHostNamespaces bool // reject host value for NetworkMode/PidMode/IpcMode/UTSMode/UsernsMode +} + +// isZero reports whether the policy enforces no restrictions. +func (p hostConfigPolicy) isZero() bool { + return !p.DenyPrivileged && !p.DenyCapAdd && !p.DenyHostNamespaces +} + +// checkHostConfigRestrictions checks bind mount sources and host config +// security restrictions for relevant container/service API requests. +func checkHostConfigRestrictions(allowedBindMounts []string, policy hostConfigPolicy, r *http.Request) error { + // Only check if restrictions are configured + if len(allowedBindMounts) == 0 && policy.isZero() { return nil } @@ -89,28 +111,28 @@ func checkBindMountRestrictions(allowedBindMounts []string, r *http.Request) err return nil } - // Check different API endpoints that can use bind mounts + // Check different API endpoints that can use bind mounts or set host config pathParts := strings.Split(r.URL.Path, "/") switch { case len(pathParts) >= 4 && pathParts[2] == "containers" && pathParts[3] == "create": // Container creation: /vX.xx/containers/create - return checkContainer(allowedBindMounts, r) + return checkContainer(allowedBindMounts, policy, r) case len(pathParts) >= 5 && pathParts[2] == "containers" && pathParts[4] == "update": // Container update: /vX.xx/containers/{id}/update - return checkContainer(allowedBindMounts, r) + return checkContainer(allowedBindMounts, policy, r) case len(pathParts) >= 4 && pathParts[2] == "services" && pathParts[3] == "create": // Service creation: /vX.xx/services/create - return checkService(allowedBindMounts, r) + return checkService(allowedBindMounts, policy, r) case len(pathParts) >= 5 && pathParts[2] == "services" && pathParts[4] == "update": // Service update: /vX.xx/services/{id}/update - return checkService(allowedBindMounts, r) + return checkService(allowedBindMounts, policy, r) default: return nil } } -// checkContainer checks bind mounts in container creation requests. -func checkContainer(allowedBindMounts []string, r *http.Request) error { +// checkContainer checks bind mounts and host config in container creation/update requests. +func checkContainer(allowedBindMounts []string, policy hostConfigPolicy, r *http.Request) error { body, err := readAndRestoreBody(r) if err != nil { return err @@ -122,11 +144,13 @@ func checkContainer(allowedBindMounts []string, r *http.Request) error { return nil // Don't block if we can't parse. } - return checkHostConfigBindMounts(allowedBindMounts, req.HostConfig) + return checkHostConfig(allowedBindMounts, policy, req.HostConfig) } -// checkService checks bind mounts in service creation requests. -func checkService(allowedBindMounts []string, r *http.Request) error { +// checkService checks bind mounts and host config in service creation/update requests. +// Swarm services only allow specifying Mounts (not Binds) and do not expose the +// host-namespace/privileged fields, so only the Mounts list is forwarded for validation. +func checkService(allowedBindMounts []string, policy hostConfigPolicy, r *http.Request) error { body, err := readAndRestoreBody(r) if err != nil { return err @@ -141,20 +165,38 @@ func checkService(allowedBindMounts []string, r *http.Request) error { if req.TaskTemplate.ContainerSpec == nil { return nil // No container spec, nothing to check. } - return checkHostConfigBindMounts( + return checkHostConfig( allowedBindMounts, + policy, &containerHostConfig{ Mounts: req.TaskTemplate.ContainerSpec.Mounts, }, ) } -// checkHostConfigBindMounts checks bind mounts in HostConfig. -func checkHostConfigBindMounts(allowedBindMounts []string, hostConfig *containerHostConfig) error { +// checkHostConfig validates bind mount sources and host config security restrictions. +func checkHostConfig(allowedBindMounts []string, policy hostConfigPolicy, hostConfig *containerHostConfig) error { if hostConfig == nil { return nil // No HostConfig, nothing to check } + if len(allowedBindMounts) > 0 { + if err := checkHostConfigBindMounts(allowedBindMounts, hostConfig); err != nil { + return err + } + } + + if !policy.isZero() { + if err := checkHostConfigSecurity(policy, hostConfig); err != nil { + return err + } + } + + return nil +} + +// checkHostConfigBindMounts checks bind mounts in HostConfig. +func checkHostConfigBindMounts(allowedBindMounts []string, hostConfig *containerHostConfig) error { // Check legacy Binds field for _, bind := range hostConfig.Binds { if err := validateBindMount(allowedBindMounts, bind); err != nil { @@ -174,6 +216,41 @@ func checkHostConfigBindMounts(allowedBindMounts []string, hostConfig *container return nil } +// checkHostConfigSecurity rejects host config fields that punch through container isolation. +// Each check is gated by its corresponding policy flag; with the zero policy this function is a no-op. +func checkHostConfigSecurity(policy hostConfigPolicy, hostConfig *containerHostConfig) error { + if policy.DenyPrivileged && hostConfig.Privileged { + return fmt.Errorf("privileged containers not allowed") + } + if policy.DenyCapAdd && len(hostConfig.CapAdd) > 0 { + return fmt.Errorf("adding kernel capabilities not allowed: %v", hostConfig.CapAdd) + } + if policy.DenyHostNamespaces { + if mode := hostConfig.NetworkMode; isHostNamespace(mode) { + return fmt.Errorf("host network mode not allowed: %s", mode) + } + if mode := hostConfig.PidMode; isHostNamespace(mode) { + return fmt.Errorf("host PID namespace not allowed: %s", mode) + } + if mode := hostConfig.IpcMode; isHostNamespace(mode) { + return fmt.Errorf("host IPC namespace not allowed: %s", mode) + } + if mode := hostConfig.UTSMode; isHostNamespace(mode) { + return fmt.Errorf("host UTS namespace not allowed: %s", mode) + } + if mode := hostConfig.UsernsMode; isHostNamespace(mode) { + return fmt.Errorf("host user namespace mode not allowed: %s", mode) + } + } + return nil +} + +// isHostNamespace reports whether a HostConfig namespace mode string requests the host namespace. +// Docker accepts both the bare "host" value and prefixed forms like "host:..." for some modes. +func isHostNamespace(mode string) bool { + return mode == "host" || strings.HasPrefix(mode, "host:") +} + // validateBindMount validates a bind mount string in the format "source:target:options". func validateBindMount(allowedBindMounts []string, bind string) error { parts := strings.Split(bind, ":") diff --git a/cmd/socket-proxy/bindmount_test.go b/cmd/socket-proxy/bindmount_test.go index d71d74e..48cd255 100644 --- a/cmd/socket-proxy/bindmount_test.go +++ b/cmd/socket-proxy/bindmount_test.go @@ -201,7 +201,7 @@ func TestCheckBindMountRestrictions(t *testing.T) { t.Fatalf("failed to create request: %v", err) } - err = checkBindMountRestrictions(allowedBindMounts, req) + err = checkHostConfigRestrictions(allowedBindMounts, hostConfigPolicy{}, req) if tt.shouldPass && err != nil { t.Errorf("expected request to pass, but got error: %v", err) } @@ -211,3 +211,228 @@ func TestCheckBindMountRestrictions(t *testing.T) { }) } } + +func TestCheckHostConfigSecurity(t *testing.T) { + skipIfNotUnix(t) + + tests := []struct { + name string + policy hostConfigPolicy + hostConfig containerHostConfig + shouldPass bool + }{ + { + name: "zero policy accepts everything", + policy: hostConfigPolicy{}, + hostConfig: containerHostConfig{Privileged: true, CapAdd: []string{"SYS_ADMIN"}, NetworkMode: "host"}, + shouldPass: true, + }, + { + name: "deny privileged rejects privileged", + policy: hostConfigPolicy{DenyPrivileged: true}, + hostConfig: containerHostConfig{Privileged: true}, + shouldPass: false, + }, + { + name: "deny privileged allows non-privileged", + policy: hostConfigPolicy{DenyPrivileged: true}, + hostConfig: containerHostConfig{Privileged: false}, + shouldPass: true, + }, + { + name: "deny capadd rejects non-empty CapAdd", + policy: hostConfigPolicy{DenyCapAdd: true}, + hostConfig: containerHostConfig{CapAdd: []string{"NET_ADMIN"}}, + shouldPass: false, + }, + { + name: "deny capadd allows empty CapAdd", + policy: hostConfigPolicy{DenyCapAdd: true}, + hostConfig: containerHostConfig{CapAdd: nil}, + shouldPass: true, + }, + { + name: "deny host namespaces rejects NetworkMode=host", + policy: hostConfigPolicy{DenyHostNamespaces: true}, + hostConfig: containerHostConfig{NetworkMode: "host"}, + shouldPass: false, + }, + { + name: "deny host namespaces rejects PidMode=host", + policy: hostConfigPolicy{DenyHostNamespaces: true}, + hostConfig: containerHostConfig{PidMode: "host"}, + shouldPass: false, + }, + { + name: "deny host namespaces rejects IpcMode=host", + policy: hostConfigPolicy{DenyHostNamespaces: true}, + hostConfig: containerHostConfig{IpcMode: "host"}, + shouldPass: false, + }, + { + name: "deny host namespaces rejects UTSMode=host", + policy: hostConfigPolicy{DenyHostNamespaces: true}, + hostConfig: containerHostConfig{UTSMode: "host"}, + shouldPass: false, + }, + { + name: "deny host namespaces rejects UsernsMode=host", + policy: hostConfigPolicy{DenyHostNamespaces: true}, + hostConfig: containerHostConfig{UsernsMode: "host"}, + shouldPass: false, + }, + { + name: "deny host namespaces rejects host: prefix", + policy: hostConfigPolicy{DenyHostNamespaces: true}, + hostConfig: containerHostConfig{NetworkMode: "host:eth0"}, + shouldPass: false, + }, + { + name: "deny host namespaces allows bridge", + policy: hostConfigPolicy{DenyHostNamespaces: true}, + hostConfig: containerHostConfig{NetworkMode: "bridge"}, + shouldPass: true, + }, + { + name: "deny host namespaces allows container: prefix", + policy: hostConfigPolicy{DenyHostNamespaces: true}, + hostConfig: containerHostConfig{NetworkMode: "container:abc123"}, + shouldPass: true, + }, + { + name: "all flags compose - rejects on any violation", + policy: hostConfigPolicy{DenyPrivileged: true, DenyCapAdd: true, DenyHostNamespaces: true}, + hostConfig: containerHostConfig{Privileged: false, CapAdd: []string{"SYS_PTRACE"}, NetworkMode: "bridge"}, + shouldPass: false, + }, + { + name: "all flags compose - accepts when none violated", + policy: hostConfigPolicy{DenyPrivileged: true, DenyCapAdd: true, DenyHostNamespaces: true}, + hostConfig: containerHostConfig{Privileged: false, CapAdd: nil, NetworkMode: "bridge"}, + shouldPass: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := checkHostConfigSecurity(tt.policy, &tt.hostConfig) + if tt.shouldPass && err != nil { + t.Errorf("expected to pass, got error: %v", err) + } + if !tt.shouldPass && err == nil { + t.Errorf("expected to fail, but it passed") + } + }) + } +} + +func TestCheckHostConfigRestrictionsWithPolicy(t *testing.T) { + skipIfNotUnix(t) + + tests := []struct { + name string + method string + path string + body string + policy hostConfigPolicy + allowBindMounts []string + shouldPass bool + }{ + { + name: "container create privileged rejected", + method: "POST", + path: "/v1.40/containers/create", + body: `{"HostConfig":{"Privileged":true}}`, + policy: hostConfigPolicy{DenyPrivileged: true}, + shouldPass: false, + }, + { + name: "container create non-privileged allowed", + method: "POST", + path: "/v1.40/containers/create", + body: `{"HostConfig":{"Privileged":false}}`, + policy: hostConfigPolicy{DenyPrivileged: true}, + shouldPass: true, + }, + { + name: "container update with capadd rejected", + method: "POST", + path: "/v1.40/containers/abc/update", + body: `{"HostConfig":{"CapAdd":["SYS_ADMIN"]}}`, + policy: hostConfigPolicy{DenyCapAdd: true}, + shouldPass: false, + }, + { + name: "container create host network rejected", + method: "POST", + path: "/v1.40/containers/create", + body: `{"HostConfig":{"NetworkMode":"host"}}`, + policy: hostConfigPolicy{DenyHostNamespaces: true}, + shouldPass: false, + }, + { + name: "bind + policy: both violated picks bind error first", + method: "POST", + path: "/v1.40/containers/create", + body: `{"HostConfig":{"Binds":["/etc:/app"],"Privileged":true}}`, + policy: hostConfigPolicy{DenyPrivileged: true}, + allowBindMounts: []string{"/home"}, + shouldPass: false, + }, + { + name: "no policy, no bind mounts: parsing skipped", + method: "POST", + path: "/v1.40/containers/create", + body: `{"HostConfig":{"Privileged":true}}`, + policy: hostConfigPolicy{}, + shouldPass: true, + }, + { + name: "GET request bypasses policy check", + method: "GET", + path: "/v1.40/containers/json", + body: "", + policy: hostConfigPolicy{DenyPrivileged: true, DenyCapAdd: true, DenyHostNamespaces: true}, + shouldPass: true, + }, + { + name: "swarm service does not surface host namespace fields", + method: "POST", + path: "/v1.40/services/create", + body: `{"TaskTemplate":{"ContainerSpec":{}}}`, + policy: hostConfigPolicy{DenyPrivileged: true, DenyHostNamespaces: true}, + shouldPass: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req, err := http.NewRequest(tt.method, tt.path, bytes.NewBufferString(tt.body)) + if err != nil { + t.Fatalf("failed to create request: %v", err) + } + err = checkHostConfigRestrictions(tt.allowBindMounts, tt.policy, req) + if tt.shouldPass && err != nil { + t.Errorf("expected to pass, got error: %v", err) + } + if !tt.shouldPass && err == nil { + t.Errorf("expected to fail, but it passed") + } + }) + } +} + +func TestHostConfigPolicyIsZero(t *testing.T) { + if !(hostConfigPolicy{}).isZero() { + t.Error("zero policy should be isZero") + } + if (hostConfigPolicy{DenyPrivileged: true}).isZero() { + t.Error("policy with DenyPrivileged should not be isZero") + } + if (hostConfigPolicy{DenyCapAdd: true}).isZero() { + t.Error("policy with DenyCapAdd should not be isZero") + } + if (hostConfigPolicy{DenyHostNamespaces: true}).isZero() { + t.Error("policy with DenyHostNamespaces should not be isZero") + } +} diff --git a/cmd/socket-proxy/handlehttprequest.go b/cmd/socket-proxy/handlehttprequest.go index f366135..1875210 100644 --- a/cmd/socket-proxy/handlehttprequest.go +++ b/cmd/socket-proxy/handlehttprequest.go @@ -30,9 +30,14 @@ func handleHTTPRequest(w http.ResponseWriter, r *http.Request) { return } - // check bind mount restrictions - if err := checkBindMountRestrictions(allowList.AllowedBindMounts, r); err != nil { - communicateBlockedRequest(w, r, "bind mount restriction: "+err.Error(), http.StatusForbidden) + // check bind mount restrictions and host config security restrictions + policy := hostConfigPolicy{ + DenyPrivileged: allowList.DenyPrivileged, + DenyCapAdd: allowList.DenyCapAdd, + DenyHostNamespaces: allowList.DenyHostNamespaces, + } + if err := checkHostConfigRestrictions(allowList.AllowedBindMounts, policy, r); err != nil { + communicateBlockedRequest(w, r, "host config restriction: "+err.Error(), http.StatusForbidden) return } diff --git a/internal/config/config.go b/internal/config/config.go index f929bf1..bc9a4e6 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -24,7 +24,22 @@ import ( "github.com/wollomatic/socket-proxy/internal/docker/client" ) -const allowedDockerLabelPrefix = "socket-proxy.allow." +const ( + allowedDockerLabelPrefix = "socket-proxy.allow." + deniedDockerLabelPrefix = "socket-proxy.deny." +) + +// denyLabels carries deny-flag values parsed from container labels. +type denyLabels struct { + Privileged bool + CapAdd bool + HostNamespaces bool +} + +// any reports whether at least one deny flag is set. +func (d denyLabels) any() bool { + return d.Privileged || d.CapAdd || d.HostNamespaces +} const ( defaultAllowFrom = "127.0.0.1/32" // allowed IPs to connect to the proxy @@ -40,6 +55,9 @@ const ( defaultProxySocketEndpoint = "" // empty string means no socket listener, but regular TCP listener defaultProxySocketEndpointFileMode = uint(0o600) // set the file mode of the unix socket endpoint defaultAllowBindMountFrom = "" // empty string means no bind mount restrictions + defaultDenyPrivileged = false // if true, reject container creation with HostConfig.Privileged == true + defaultDenyCapAdd = false // if true, reject container creation that adds kernel capabilities + defaultDenyHostNamespaces = false // if true, reject container creation that requests host namespaces defaultProxyContainerName = "" // socket-proxy Docker container name (empty string disables container labels for allowlists) ) @@ -67,9 +85,12 @@ type AllowListRegistry struct { } type AllowList struct { - ID string // Container ID (empty for the default allowlist) - AllowedRequests map[string][]*regexp.Regexp // map of request methods to request path regex patterns (no requests allowed if empty) - AllowedBindMounts []string // list of from portion of allowed bind mounts (all bind mounts allowed if empty) + ID string // Container ID (empty for the default allowlist) + AllowedRequests map[string][]*regexp.Regexp // map of request methods to request path regex patterns (no requests allowed if empty) + AllowedBindMounts []string // list of from portion of allowed bind mounts (all bind mounts allowed if empty) + DenyPrivileged bool // reject container creation with HostConfig.Privileged == true + DenyCapAdd bool // reject container creation with non-empty HostConfig.CapAdd + DenyHostNamespaces bool // reject container creation with host value for NetworkMode/PidMode/IpcMode/UTSMode/UsernsMode } // used for list of allowed requests @@ -101,6 +122,9 @@ func InitConfig() (*Config, error) { logLevel string endpointFileMode uint allowBindMountFromString string + denyPrivileged bool + denyCapAdd bool + denyHostNamespaces bool defaultAllowFromValue = defaultAllowFrom defaultAllowHealthcheckValue = defaultAllowHealthcheck defaultLogJSONValue = defaultLogJSON @@ -114,6 +138,9 @@ func InitConfig() (*Config, error) { defaultProxySocketEndpointValue = defaultProxySocketEndpoint defaultProxySocketEndpointFileModeValue = defaultProxySocketEndpointFileMode defaultAllowBindMountFromValue = defaultAllowBindMountFrom + defaultDenyPrivilegedValue = defaultDenyPrivileged + defaultDenyCapAddValue = defaultDenyCapAdd + defaultDenyHostNamespacesValue = defaultDenyHostNamespaces defaultProxyContainerNameValue = defaultProxyContainerName ) @@ -170,6 +197,21 @@ func InitConfig() (*Config, error) { if val, ok := os.LookupEnv("SP_ALLOWBINDMOUNTFROM"); ok && val != "" { defaultAllowBindMountFromValue = val } + if val, ok := os.LookupEnv("SP_DENYPRIVILEGED"); ok { + if parsedVal, err := strconv.ParseBool(val); err == nil { + defaultDenyPrivilegedValue = parsedVal + } + } + if val, ok := os.LookupEnv("SP_DENYCAPADD"); ok { + if parsedVal, err := strconv.ParseBool(val); err == nil { + defaultDenyCapAddValue = parsedVal + } + } + if val, ok := os.LookupEnv("SP_DENYHOSTNAMESPACES"); ok { + if parsedVal, err := strconv.ParseBool(val); err == nil { + defaultDenyHostNamespacesValue = parsedVal + } + } if val, ok := os.LookupEnv("SP_PROXYCONTAINERNAME"); ok && val != "" { defaultProxyContainerNameValue = val } @@ -200,6 +242,9 @@ func InitConfig() (*Config, error) { flag.StringVar(&cfg.ProxySocketEndpoint, "proxysocketendpoint", defaultProxySocketEndpointValue, "unix socket endpoint (if set, used instead of the TCP listener)") flag.UintVar(&endpointFileMode, "proxysocketendpointfilemode", defaultProxySocketEndpointFileModeValue, "set the file mode of the unix socket endpoint") flag.StringVar(&allowBindMountFromString, "allowbindmountfrom", defaultAllowBindMountFromValue, "allowed directories for bind mounts (comma-separated)") + flag.BoolVar(&denyPrivileged, "denyprivileged", defaultDenyPrivilegedValue, "reject container creation requests that set HostConfig.Privileged=true") + flag.BoolVar(&denyCapAdd, "denycapadd", defaultDenyCapAddValue, "reject container creation requests that add kernel capabilities (HostConfig.CapAdd)") + flag.BoolVar(&denyHostNamespaces, "denyhostnamespaces", defaultDenyHostNamespacesValue, "reject container creation requests that request host namespaces (NetworkMode/PidMode/IpcMode/UTSMode/UsernsMode=host)") flag.StringVar(&cfg.ProxyContainerName, "proxycontainername", defaultProxyContainerNameValue, "socket-proxy Docker container name") for i := range methodAllowLists { flag.Var(&methodAllowLists[i].regexStrings, "allow"+methodAllowLists[i].method, "regex for "+methodAllowLists[i].method+" requests (not set means method is not allowed)") @@ -221,6 +266,11 @@ func InitConfig() (*Config, error) { cfg.AllowLists.Default.AllowedBindMounts = allowedBindMounts } + // apply host config security deny flags to the default allowlist + cfg.AllowLists.Default.DenyPrivileged = denyPrivileged + cfg.AllowLists.Default.DenyCapAdd = denyCapAdd + cfg.AllowLists.Default.DenyHostNamespaces = denyHostNamespaces + // check listenIP and proxyPort if proxyPort < 1 || proxyPort > 65535 { return nil, errors.New("port number has to be between 1 and 65535") @@ -416,19 +466,22 @@ func (allowLists *AllowListRegistry) initByIP(ctx context.Context, dockerClient allowLists.byIP = make(map[string]AllowList) for _, cntr := range containers { - allowedRequests, allowedBindMounts, err := extractLabelData(cntr) + allowedRequests, allowedBindMounts, deny, err := extractLabelData(cntr) if err != nil { allowLists.byIP = nil return err } - if len(allowedRequests) > 0 || len(allowedBindMounts) > 0 { + if len(allowedRequests) > 0 || len(allowedBindMounts) > 0 || deny.any() { for networkID, cntrNetwork := range cntr.NetworkSettings.Networks { if slices.Contains(allowLists.networks, networkID) { allowList := AllowList{ - ID: cntr.ID, - AllowedRequests: allowedRequests, - AllowedBindMounts: allowedBindMounts, + ID: cntr.ID, + AllowedRequests: allowedRequests, + AllowedBindMounts: allowedBindMounts, + DenyPrivileged: deny.Privileged, + DenyCapAdd: deny.CapAdd, + DenyHostNamespaces: deny.HostNamespaces, } if len(cntrNetwork.IPAddress) > 0 { @@ -488,17 +541,20 @@ func (allowLists *AllowListRegistry) add( } cntr := containers[0] - allowedRequests, allowedBindMounts, err := extractLabelData(cntr) + allowedRequests, allowedBindMounts, deny, err := extractLabelData(cntr) if err != nil { return nil, err } var ips []string - if len(allowedRequests) > 0 || len(allowedBindMounts) > 0 { + if len(allowedRequests) > 0 || len(allowedBindMounts) > 0 || deny.any() { allowList := AllowList{ - ID: cntr.ID, - AllowedRequests: allowedRequests, - AllowedBindMounts: allowedBindMounts, + ID: cntr.ID, + AllowedRequests: allowedRequests, + AllowedBindMounts: allowedBindMounts, + DenyPrivileged: deny.Privileged, + DenyCapAdd: deny.CapAdd, + DenyHostNamespaces: deny.HostNamespaces, } allowLists.mutex.Lock() @@ -684,27 +740,45 @@ func getSocketProxyContainerSummary(socketPath, proxyContainerName string) (cont } // extract Docker container allowlist label data from the container summary -func extractLabelData(cntr container.Summary) (map[string][]*regexp.Regexp, []string, error) { +func extractLabelData(cntr container.Summary) (map[string][]*regexp.Regexp, []string, denyLabels, error) { allowedRequests := make(map[string][]*regexp.Regexp) var allowedBindMounts []string + var deny denyLabels for labelName, labelValue := range cntr.Labels { - if strings.HasPrefix(labelName, allowedDockerLabelPrefix) && labelValue != "" { + if labelValue == "" { + continue + } + if strings.HasPrefix(labelName, allowedDockerLabelPrefix) { allowSpec := strings.ToUpper(strings.TrimPrefix(labelName, allowedDockerLabelPrefix)) method, _, _ := strings.Cut(allowSpec, ".") if slices.Contains(supportedHTTPMethods, method) { r, err := compileRegexp(labelValue, method, "docker container label") if err != nil { - return nil, nil, err + return nil, nil, denyLabels{}, err } allowedRequests[method] = append(allowedRequests[method], r) } else if allowSpec == "BINDMOUNTFROM" { var err error allowedBindMounts, err = parseAllowedBindMounts(labelValue) if err != nil { - return nil, nil, err + return nil, nil, denyLabels{}, err } } + } else if strings.HasPrefix(labelName, deniedDockerLabelPrefix) { + denySpec := strings.ToLower(strings.TrimPrefix(labelName, deniedDockerLabelPrefix)) + parsedVal, err := strconv.ParseBool(labelValue) + if err != nil { + return nil, nil, denyLabels{}, fmt.Errorf("invalid boolean value %q for label %s", labelValue, labelName) + } + switch denySpec { + case "privileged": + deny.Privileged = parsedVal + case "capadd": + deny.CapAdd = parsedVal + case "hostnamespaces": + deny.HostNamespaces = parsedVal + } } } - return allowedRequests, allowedBindMounts, nil + return allowedRequests, allowedBindMounts, deny, nil } diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 116408a..22ddaae 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -80,7 +80,7 @@ func Test_extractLabelData(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, got2, gotErr := extractLabelData(tt.cntr) + got, got2, _, gotErr := extractLabelData(tt.cntr) if gotErr != nil { if !tt.wantErr { t.Errorf("extractLabelData() failed: %v", gotErr) @@ -150,6 +150,170 @@ func TestInitConfig_AllowMethodFlagOverridesEnv(t *testing.T) { } } +func TestInitConfig_DenyHostConfigFlags(t *testing.T) { + restore := resetFlagsForTest(t, []string{ + "socket-proxy", + "-denyprivileged", + "-denycapadd", + "-denyhostnamespaces", + }) + defer restore() + + cfg, err := InitConfig() + if err != nil { + t.Fatalf("InitConfig() error = %v", err) + } + if !cfg.AllowLists.Default.DenyPrivileged { + t.Error("expected DenyPrivileged=true") + } + if !cfg.AllowLists.Default.DenyCapAdd { + t.Error("expected DenyCapAdd=true") + } + if !cfg.AllowLists.Default.DenyHostNamespaces { + t.Error("expected DenyHostNamespaces=true") + } +} + +func TestInitConfig_DenyHostConfigEnvVars(t *testing.T) { + t.Setenv("SP_DENYPRIVILEGED", "true") + t.Setenv("SP_DENYCAPADD", "1") + t.Setenv("SP_DENYHOSTNAMESPACES", "TRUE") + restore := resetFlagsForTest(t, []string{"socket-proxy"}) + defer restore() + + cfg, err := InitConfig() + if err != nil { + t.Fatalf("InitConfig() error = %v", err) + } + if !cfg.AllowLists.Default.DenyPrivileged { + t.Error("expected DenyPrivileged=true via env") + } + if !cfg.AllowLists.Default.DenyCapAdd { + t.Error("expected DenyCapAdd=true via env") + } + if !cfg.AllowLists.Default.DenyHostNamespaces { + t.Error("expected DenyHostNamespaces=true via env") + } +} + +func TestInitConfig_DenyHostConfigDefaultsFalse(t *testing.T) { + restore := resetFlagsForTest(t, []string{"socket-proxy"}) + defer restore() + + cfg, err := InitConfig() + if err != nil { + t.Fatalf("InitConfig() error = %v", err) + } + if cfg.AllowLists.Default.DenyPrivileged { + t.Error("expected DenyPrivileged=false by default") + } + if cfg.AllowLists.Default.DenyCapAdd { + t.Error("expected DenyCapAdd=false by default") + } + if cfg.AllowLists.Default.DenyHostNamespaces { + t.Error("expected DenyHostNamespaces=false by default") + } +} + +func Test_extractLabelData_DenyLabels(t *testing.T) { + tests := []struct { + name string + labels map[string]string + want denyLabels + wantErr bool + }{ + { + name: "no deny labels", + labels: map[string]string{"socket-proxy.allow.get": "/.*"}, + want: denyLabels{}, + }, + { + name: "deny privileged true", + labels: map[string]string{ + "socket-proxy.deny.privileged": "true", + }, + want: denyLabels{Privileged: true}, + }, + { + name: "deny capadd 1", + labels: map[string]string{ + "socket-proxy.deny.capadd": "1", + }, + want: denyLabels{CapAdd: true}, + }, + { + name: "deny hostnamespaces true", + labels: map[string]string{ + "socket-proxy.deny.hostnamespaces": "TRUE", + }, + want: denyLabels{HostNamespaces: true}, + }, + { + name: "all deny labels", + labels: map[string]string{ + "socket-proxy.deny.privileged": "true", + "socket-proxy.deny.capadd": "true", + "socket-proxy.deny.hostnamespaces": "true", + }, + want: denyLabels{Privileged: true, CapAdd: true, HostNamespaces: true}, + }, + { + name: "deny false is treated as not set", + labels: map[string]string{ + "socket-proxy.deny.privileged": "false", + }, + want: denyLabels{}, + }, + { + name: "invalid deny value returns error", + labels: map[string]string{ + "socket-proxy.deny.privileged": "yes", + }, + wantErr: true, + }, + { + name: "unknown deny key ignored", + labels: map[string]string{ + "socket-proxy.deny.unknown": "true", + }, + want: denyLabels{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, _, got, gotErr := extractLabelData(container.Summary{Labels: tt.labels}) + if tt.wantErr { + if gotErr == nil { + t.Fatal("expected error, got nil") + } + return + } + if gotErr != nil { + t.Fatalf("unexpected error: %v", gotErr) + } + if got != tt.want { + t.Errorf("got %+v, want %+v", got, tt.want) + } + }) + } +} + +func TestDenyLabelsAny(t *testing.T) { + if (denyLabels{}).any() { + t.Error("empty denyLabels should not be any()") + } + if !(denyLabels{Privileged: true}).any() { + t.Error("denyLabels{Privileged} should be any()") + } + if !(denyLabels{CapAdd: true}).any() { + t.Error("denyLabels{CapAdd} should be any()") + } + if !(denyLabels{HostNamespaces: true}).any() { + t.Error("denyLabels{HostNamespaces} should be any()") + } +} + func TestInitConfig_ShutdownGraceTimeTooLarge(t *testing.T) { restore := resetFlagsForTest(t, []string{ "socket-proxy",