From b4dd53ce8efc826b9e3603f55e6c07f1cc6f3dba Mon Sep 17 00:00:00 2001 From: Taylor Mutch Date: Tue, 30 Jun 2026 15:23:58 -0700 Subject: [PATCH] feat(kubernetes): add cni-sidecar supervisor topology Signed-off-by: Taylor Mutch --- .../skills/debug-openshell-cluster/SKILL.md | 16 + .agents/skills/helm-dev-environment/SKILL.md | 22 +- .github/workflows/docker-build.yml | 3 + Cargo.lock | 14 + architecture/compute-runtimes.md | 18 +- crates/openshell-cni/Cargo.toml | 29 + crates/openshell-cni/src/lib.rs | 1088 +++++++++++++++++ crates/openshell-cni/src/main.rs | 9 + crates/openshell-driver-kubernetes/README.md | 17 +- .../openshell-driver-kubernetes/src/config.rs | 16 + .../openshell-driver-kubernetes/src/driver.rs | 160 ++- deploy/docker/Dockerfile.supervisor | 6 +- deploy/helm/openshell/README.md | 17 +- .../helm/openshell/ci/values-cni-sidecar.yaml | 15 + deploy/helm/openshell/skaffold.yaml | 9 + .../openshell/templates/_gateway-workload.tpl | 1 + deploy/helm/openshell/templates/_helpers.tpl | 14 + .../openshell/templates/cni-daemonset.yaml | 181 +++ .../helm/openshell/templates/deployment.yaml | 1 + deploy/helm/openshell/templates/service.yaml | 1 + .../helm/openshell/templates/statefulset.yaml | 1 + .../openshell/tests/cni_daemonset_test.yaml | 71 ++ .../openshell/tests/gateway_config_test.yaml | 16 + deploy/helm/openshell/tests/service_test.yaml | 13 + .../tests/statefulset_client_ca_test.yaml | 7 + deploy/helm/openshell/values.yaml | 29 +- docs/kubernetes/setup.mdx | 5 +- docs/kubernetes/topology.mdx | 128 +- docs/reference/gateway-config.mdx | 8 +- docs/reference/sandbox-compute-drivers.mdx | 35 +- tasks/helm.toml | 15 + tasks/scripts/docker-build-image.sh | 2 +- tasks/scripts/stage-prebuilt-binaries.sh | 12 +- tasks/test.toml | 5 + 34 files changed, 1918 insertions(+), 66 deletions(-) create mode 100644 crates/openshell-cni/Cargo.toml create mode 100644 crates/openshell-cni/src/lib.rs create mode 100644 crates/openshell-cni/src/main.rs create mode 100644 deploy/helm/openshell/ci/values-cni-sidecar.yaml create mode 100644 deploy/helm/openshell/templates/cni-daemonset.yaml create mode 100644 deploy/helm/openshell/tests/cni_daemonset_test.yaml create mode 100644 deploy/helm/openshell/tests/service_test.yaml diff --git a/.agents/skills/debug-openshell-cluster/SKILL.md b/.agents/skills/debug-openshell-cluster/SKILL.md index b859b0d54..ea32396ed 100644 --- a/.agents/skills/debug-openshell-cluster/SKILL.md +++ b/.agents/skills/debug-openshell-cluster/SKILL.md @@ -285,6 +285,18 @@ workload entrypoint PID to `OPENSHELL_ENTRYPOINT_PID_FILE` should read it for binary-scoped policy decisions; if allowed network rules are all denied, inspect that file and the network sidecar logs. +If `supervisor_topology = "cni-sidecar"` is rendered, the gateway should render +the same process container and long-running network sidecar as sidecar mode, but +there should be no `openshell-network-init` init container in sandbox pods. +Instead, the chart must install the privileged `openshell-cni` DaemonSet and the +sandbox pod should carry `openshell.ai/cni=enabled`, +`openshell.ai/network-enforcement-mode=cni-sidecar`, and +`openshell.ai/proxy-uid=` annotations. The CNI DaemonSet copies +`/openshell-cni` into the host CNI binary directory and patches an existing CNI +`.conflist`; if sandbox pods bypass network enforcement or fail during pod +network setup, inspect the DaemonSet logs, the host CNI config, and whether the +cluster actually invokes chained CNI plugins for the sandbox runtime class. + If `supervisor_topology = "proxy-pod"` is rendered, each sandbox should have a separate supervisor Deployment with one supervisor pod, a headless supervisor Service, a proxy CA Secret, and two per-sandbox NetworkPolicies. The agent pod @@ -305,6 +317,9 @@ Inspect all three when sandbox registration or egress enforcement fails: kubectl -n openshell get configmap openshell-config -o jsonpath='{.data.gateway\.toml}' | grep supervisor_topology kubectl -n get pod -o jsonpath='{range .spec.initContainers[*]}{.name}{" "}{.command}{"\n"}{end}' kubectl -n get pod -o jsonpath='{range .spec.containers[*]}{.name}{" "}{.command}{"\n"}{end}' +kubectl -n get pod -o jsonpath='{.metadata.annotations}' +kubectl -n openshell get daemonset,pod -l app.kubernetes.io/component=cni +kubectl -n openshell logs daemonset/openshell-cni -c install-cni --tail=200 kubectl -n logs -c openshell-network-init --tail=200 kubectl -n logs -c openshell-supervisor-network --tail=200 kubectl -n logs -c agent --tail=200 @@ -338,6 +353,7 @@ openshell logs | Kubernetes gateway pod crash loops | Missing secret, bad DB URL, bad TLS config | `kubectl -n openshell logs deployment/openshell -c openshell-gateway` or `kubectl -n openshell logs statefulset/openshell -c openshell-gateway` | | CLI TLS error | Local mTLS bundle does not match server cert/CA | Check `~/.config/openshell/gateways//mtls/` | | Image pull failure | Gateway or sandbox image cannot be pulled | Runtime events and image pull credentials | +| CNI-sidecar sandbox pods fail network setup | OpenShell CNI DaemonSet did not patch the node CNI conflist, cannot read pods, or the runtime class does not invoke the chained plugin | `kubectl -n openshell logs daemonset/openshell-cni -c install-cni`, chart `cni.*` values, host CNI config | | `K8s namespace not ready` with `envoy-gateway-openshell.yaml: the server could not find the requested resource` | Optional Gateway API manifest was applied without Envoy Gateway CRDs, or k3s Helm controller startup exceeded the namespace wait | Apply `deploy/kube/manifests/envoy-gateway-openshell.yaml` manually only after Envoy Gateway is installed and `grpcRoute` is enabled | ## Reporting diff --git a/.agents/skills/helm-dev-environment/SKILL.md b/.agents/skills/helm-dev-environment/SKILL.md index a2a34f8c0..160eadf14 100644 --- a/.agents/skills/helm-dev-environment/SKILL.md +++ b/.agents/skills/helm-dev-environment/SKILL.md @@ -65,6 +65,11 @@ mise run helm:skaffold:run mise run helm:skaffold:run:sidecar ``` +**Supervisor CNI-sidecar topology** (build once and leave running): +```bash +mise run helm:skaffold:run:cni-sidecar +``` + **Supervisor proxy-pod topology** (build once and leave running): ```bash mise run helm:skaffold:run:proxy-pod @@ -73,7 +78,9 @@ mise run helm:skaffold:run:proxy-pod All Skaffold commands build the `gateway` and `supervisor` images and deploy the OpenShell Helm chart. The sidecar profile renders an `openshell-network-init` init container for nftables setup and a non-root `openshell-supervisor-network` runtime sidecar for -proxying. The proxy-pod profile renders network supervision in a separate +proxying. The cni-sidecar profile enables the privileged OpenShell CNI +DaemonSet and uses the sidecar runtime model without the pod-local network init +container. The proxy-pod profile renders network supervision in a separate supervisor Deployment with one pod and relies on Kubernetes NetworkPolicy enforcement so the agent pod can reach only its paired supervisor plus DNS. The default local k3s/k3d cluster keeps k3s's embedded NetworkPolicy controller @@ -104,6 +111,12 @@ Run the sidecar topology e2e environment: mise run e2e:kubernetes:sidecar ``` +Run the CNI-sidecar topology e2e environment: + +```bash +mise run e2e:kubernetes:cni-sidecar +``` + Run the proxy-pod topology e2e environment: ```bash @@ -176,6 +189,12 @@ For a sidecar-profile deployment: mise run helm:skaffold:delete:sidecar ``` +For a cni-sidecar-profile deployment: + +```bash +mise run helm:skaffold:delete:cni-sidecar +``` + For a proxy-pod-profile deployment: ```bash @@ -307,6 +326,7 @@ for dependencies still declared in `Chart.yaml`. | `deploy/helm/openshell/ci/values-high-availability.yaml` | HA test overlay (`replicaCount: 2` with external PostgreSQL Secret) | | `deploy/helm/openshell/ci/values-keycloak.yaml` | Keycloak OIDC overlay | | `deploy/helm/openshell/ci/values-sidecar.yaml` | Supervisor sidecar topology overlay for Kubernetes e2e/dev | +| `deploy/helm/openshell/ci/values-cni-sidecar.yaml` | Supervisor CNI-sidecar topology overlay for Kubernetes e2e/dev; enables the OpenShell CNI DaemonSet | | `deploy/helm/openshell/ci/values-proxy-pod.yaml` | Supervisor proxy-pod topology overlay for Kubernetes e2e/dev; requires NetworkPolicy enforcement | | `deploy/helm/openshell/ci/values-spire.yaml` | SPIFFE/SPIRE provider token grant overlay | | `deploy/helm/openshell/ci/values-spire-stack.yaml` | SPIRE hardened chart values for local dev | diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 4c92bc385..fc04e249b 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -246,6 +246,9 @@ jobs: fi mkdir -p "$stage" install -m 0755 "$found" "$stage/$binary" + if [[ "${{ inputs.component }}" == "supervisor" ]]; then + PREBUILT_ARCH="${{ matrix.arch }}" tasks/scripts/stage-prebuilt-binaries.sh cni + fi ls -lh "$stage/" - name: Build ${{ inputs.component }} image diff --git a/Cargo.lock b/Cargo.lock index ad9136c0a..a1f240fb9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3624,6 +3624,20 @@ dependencies = [ "url", ] +[[package]] +name = "openshell-cni" +version = "0.0.0" +dependencies = [ + "base64 0.22.1", + "libc", + "miette", + "reqwest 0.12.28", + "serde", + "serde_json", + "serde_yml", + "tempfile", +] + [[package]] name = "openshell-core" version = "0.0.0" diff --git a/architecture/compute-runtimes.md b/architecture/compute-runtimes.md index ac239bfb8..8b7faba05 100644 --- a/architecture/compute-runtimes.md +++ b/architecture/compute-runtimes.md @@ -89,19 +89,23 @@ Driver-controlled environment variables must override sandbox image or template values for sandbox ID, sandbox name, gateway endpoint, relay socket path, TLS paths, and command metadata. -Kubernetes can run the supervisor in the default combined topology or in a -sidecar topology. Combined mode keeps network and process supervision in the +Kubernetes can run the supervisor in combined, sidecar, cni-sidecar, or +proxy-pod topology. Combined mode keeps network and process supervision in the agent container. Sidecar mode runs network enforcement, the proxy, and gateway loopback forwarding in a dedicated sidecar, while the agent container runs only the process-supervision leaf and launches the user workload after the sidecar signals readiness. In sidecar mode, an init container performs the privileged pod-network nftables setup with `NET_ADMIN` and hands shared state ownership to the configured proxy UID; the long-running network sidecar runs as that UID and -does not keep `NET_ADMIN`. The agent container runs as the resolved sandbox -UID/GID with no added Linux capabilities. Sidecar mode preserves gateway session -and SSH behavior, but treats the process leaf as network-only: Landlock -filesystem policy, process privilege dropping, and process/binary identity -checks are not applied there. +does not keep `NET_ADMIN`. CNI-sidecar mode keeps the sidecar runtime model but +requires the privileged OpenShell CNI DaemonSet to install the pod-network rules +during CNI `ADD` using nftables or iptables. Proxy-pod mode moves network +enforcement into a paired supervisor Deployment and requires NetworkPolicy +enforcement. The agent container runs as the resolved sandbox UID/GID with no +added Linux capabilities in the alternate topologies. They preserve gateway +session and SSH behavior, but +treat the process leaf as network-only: Landlock filesystem policy, process +privilege dropping, and process/binary identity checks are not applied there. ## Images diff --git a/crates/openshell-cni/Cargo.toml b/crates/openshell-cni/Cargo.toml new file mode 100644 index 000000000..835d1ef4c --- /dev/null +++ b/crates/openshell-cni/Cargo.toml @@ -0,0 +1,29 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "openshell-cni" +description = "OpenShell chained CNI plugin for Kubernetes sidecar network enforcement" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +base64 = { workspace = true } +miette = { workspace = true } +reqwest = { workspace = true, features = ["blocking"] } +serde = { workspace = true } +serde_json = { workspace = true } +serde_yml = { workspace = true } +tempfile = "3" + +[target.'cfg(target_os = "linux")'.dependencies] +libc = "0.2" + +[dev-dependencies] +tempfile = "3" + +[lints] +workspace = true diff --git a/crates/openshell-cni/src/lib.rs b/crates/openshell-cni/src/lib.rs new file mode 100644 index 000000000..27c2f8e1b --- /dev/null +++ b/crates/openshell-cni/src/lib.rs @@ -0,0 +1,1088 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use base64::Engine; +use miette::{Context, IntoDiagnostic, Result}; +use serde::Deserialize; +use serde_json::Value; +use std::collections::BTreeMap; +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; +#[cfg(target_os = "linux")] +use std::process::Command; + +const DEFAULT_CNI_VERSION: &str = "1.0.0"; +const SUPPORTED_CNI_VERSIONS: &[&str] = &["0.3.0", "0.3.1", "0.4.0", "1.0.0"]; +const DEFAULT_KUBECONFIG_PATH: &str = "/etc/cni/net.d/openshell-cni-kubeconfig"; +const OPENSHELL_CNI_ENABLED_ANNOTATION: &str = "openshell.ai/cni"; +const OPENSHELL_CNI_PROXY_UID_ANNOTATION: &str = "openshell.ai/proxy-uid"; +const OPENSHELL_CNI_NETWORK_ENFORCEMENT_MODE_ANNOTATION: &str = + "openshell.ai/network-enforcement-mode"; +const CNI_SIDECAR_NETWORK_ENFORCEMENT_MODE: &str = "cni-sidecar"; +#[allow(dead_code)] +const OPENSHELL_TABLE: &str = "openshell_sidecar_bypass"; +#[allow(dead_code)] +const OPENSHELL_IPTABLES_CHAIN: &str = "OPENSHELL_OUTPUT"; +#[cfg(target_os = "linux")] +const NFT_SEARCH_PATHS: &[&str] = &[ + "/usr/sbin/nft", + "/sbin/nft", + "/usr/bin/nft", + "/bin/nft", + "/opt/cni/bin/nft", + "/bin/aux/nft", +]; +#[cfg(target_os = "linux")] +const IPTABLES_SEARCH_PATHS: &[&str] = &[ + "/usr/sbin/iptables", + "/sbin/iptables", + "/usr/bin/iptables", + "/bin/iptables", + "/opt/cni/bin/iptables", + "/bin/aux/iptables", +]; +#[cfg(target_os = "linux")] +const IP6TABLES_SEARCH_PATHS: &[&str] = &[ + "/usr/sbin/ip6tables", + "/sbin/ip6tables", + "/usr/bin/ip6tables", + "/bin/ip6tables", + "/opt/cni/bin/ip6tables", + "/bin/aux/ip6tables", +]; + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct CniConfig { + cni_version: Option, + #[serde(default)] + prev_result: Option, + #[serde(default)] + openshell: OpenShellConfig, +} + +#[derive(Debug, Default, Deserialize)] +#[serde(rename_all = "camelCase")] +struct OpenShellConfig { + kubeconfig: Option, + log_file: Option, + #[serde(default)] + sandbox_namespaces: Vec, +} + +#[derive(Debug, Clone)] +struct CniEnv { + command: String, + netns: Option, + args: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct PodRef { + namespace: String, + name: String, +} + +#[derive(Debug, Deserialize)] +struct PodResponse { + metadata: PodMetadata, +} + +#[derive(Debug, Deserialize)] +struct PodMetadata { + #[serde(default)] + annotations: BTreeMap, +} + +#[derive(Debug, Deserialize)] +struct KubeConfig { + #[serde(rename = "current-context")] + current_context: String, + clusters: Vec, + contexts: Vec, + users: Vec, +} + +#[derive(Debug, Deserialize)] +struct NamedCluster { + name: String, + cluster: ClusterConfig, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "kebab-case")] +struct ClusterConfig { + server: String, + certificate_authority_data: Option, + certificate_authority: Option, +} + +#[derive(Debug, Deserialize)] +struct NamedContext { + name: String, + context: ContextConfig, +} + +#[derive(Debug, Deserialize)] +struct ContextConfig { + cluster: String, + user: String, +} + +#[derive(Debug, Deserialize)] +struct NamedUser { + name: String, + user: UserConfig, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "kebab-case")] +struct UserConfig { + token: Option, + token_file: Option, +} + +struct Runtime; + +trait PodReader { + fn pod_annotations(&self, kubeconfig: &Path, pod: &PodRef) -> Result>; +} + +trait RuleInstaller { + fn install(&self, netns: &Path, proxy_uid: u32) -> Result; + fn cleanup(&self, netns: &Path) -> Result<()>; +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct InstallReport { + backend: &'static str, +} + +pub fn run() -> Result<()> { + let mut input = String::new(); + std::io::stdin() + .read_to_string(&mut input) + .into_diagnostic()?; + let env = CniEnv::from_process(); + let runtime = Runtime; + let output = match handle_command(&input, &env, &runtime, &runtime) { + Ok(output) => output, + Err(error) => { + log_cni_error(&input, &env, &error); + return Err(error); + } + }; + if let Some(output) = output { + println!("{}", serde_json::to_string(&output).into_diagnostic()?); + } + Ok(()) +} + +fn log_cni_error(input: &str, env: &CniEnv, error: &miette::Report) { + let Ok(config) = serde_json::from_str::(input) else { + return; + }; + log_cni_info(&config, env, &format!("error={}", one_line_error(error))); +} + +fn log_cni_info(config: &CniConfig, env: &CniEnv, message: &str) { + let Some(log_file) = config.openshell.log_file.as_deref() else { + return; + }; + if log_file.is_empty() { + return; + } + + let pod = env.pod_ref().map_or_else( + || "-".to_string(), + |pod| format!("{}/{}", pod.namespace, pod.name), + ); + let Ok(mut file) = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(log_file) + else { + return; + }; + let _ = writeln!(file, "command={} pod={} {}", env.command, pod, message); +} + +fn one_line_error(error: &miette::Report) -> String { + format!("{error:?}") + .lines() + .map(str::trim) + .filter(|line| !line.is_empty()) + .collect::>() + .join(" | ") +} + +fn handle_command( + input: &str, + env: &CniEnv, + pod_reader: &impl PodReader, + installer: &impl RuleInstaller, +) -> Result> { + match env.command.as_str() { + "VERSION" => Ok(Some(version_response())), + "DEL" => { + if let Some(netns) = env.netns.as_deref() { + let _ = installer.cleanup(netns); + } + Ok(None) + } + "ADD" => { + let config: CniConfig = serde_json::from_str(input).into_diagnostic()?; + if let Some(workload) = workload_from_config(&config, env, pod_reader)? { + let netns = env.netns.as_deref().ok_or_else(|| { + miette::miette!("CNI_NETNS is required for OpenShell CNI ADD") + })?; + let report = installer.install(netns, workload.proxy_uid)?; + log_cni_info( + &config, + env, + &format!( + "status=installed backend={} proxy_uid={}", + report.backend, workload.proxy_uid + ), + ); + } + Ok(Some(pass_through_result(&config))) + } + "CHECK" => { + let config: CniConfig = serde_json::from_str(input).into_diagnostic()?; + if let Some(workload) = workload_from_config(&config, env, pod_reader)? { + let netns = env.netns.as_deref().ok_or_else(|| { + miette::miette!("CNI_NETNS is required for OpenShell CNI CHECK") + })?; + let report = installer.install(netns, workload.proxy_uid)?; + log_cni_info( + &config, + env, + &format!( + "status=installed backend={} proxy_uid={}", + report.backend, workload.proxy_uid + ), + ); + } + Ok(None) + } + other => Err(miette::miette!("unsupported CNI_COMMAND '{other}'")), + } +} + +#[derive(Debug, Clone, Copy)] +struct WorkloadConfig { + proxy_uid: u32, +} + +fn workload_from_config( + config: &CniConfig, + env: &CniEnv, + pod_reader: &impl PodReader, +) -> Result> { + let Some(pod) = env.pod_ref() else { + return Ok(None); + }; + if !config.openshell.sandbox_namespaces.is_empty() + && !config + .openshell + .sandbox_namespaces + .iter() + .any(|namespace| namespace == &pod.namespace) + { + return Ok(None); + } + let kubeconfig = config + .openshell + .kubeconfig + .as_deref() + .unwrap_or(DEFAULT_KUBECONFIG_PATH); + let annotations = pod_reader.pod_annotations(Path::new(kubeconfig), &pod)?; + if annotations + .get(OPENSHELL_CNI_ENABLED_ANNOTATION) + .map(String::as_str) + != Some("enabled") + { + return Ok(None); + } + if annotations + .get(OPENSHELL_CNI_NETWORK_ENFORCEMENT_MODE_ANNOTATION) + .map(String::as_str) + != Some(CNI_SIDECAR_NETWORK_ENFORCEMENT_MODE) + { + return Ok(None); + } + let proxy_uid = annotations + .get(OPENSHELL_CNI_PROXY_UID_ANNOTATION) + .ok_or_else(|| miette::miette!("OpenShell CNI pod is missing proxy UID annotation"))? + .parse::() + .into_diagnostic() + .wrap_err("invalid OpenShell CNI proxy UID annotation")?; + Ok(Some(WorkloadConfig { proxy_uid })) +} + +fn pass_through_result(config: &CniConfig) -> Value { + config.prev_result.clone().unwrap_or_else(|| { + serde_json::json!({ + "cniVersion": config.cni_version.as_deref().unwrap_or(DEFAULT_CNI_VERSION) + }) + }) +} + +fn version_response() -> Value { + serde_json::json!({ + "cniVersion": DEFAULT_CNI_VERSION, + "supportedVersions": SUPPORTED_CNI_VERSIONS + }) +} + +impl CniEnv { + fn from_process() -> Self { + Self { + command: std::env::var("CNI_COMMAND").unwrap_or_else(|_| "VERSION".to_string()), + netns: std::env::var_os("CNI_NETNS").map(PathBuf::from), + args: std::env::var("CNI_ARGS").ok(), + } + } + + fn pod_ref(&self) -> Option { + let args = self.args.as_deref()?; + let values = parse_cni_args(args); + let namespace = values.get("K8S_POD_NAMESPACE")?.to_string(); + let name = values.get("K8S_POD_NAME")?.to_string(); + Some(PodRef { namespace, name }) + } +} + +fn parse_cni_args(args: &str) -> BTreeMap<&str, &str> { + args.split(';') + .filter_map(|part| part.split_once('=')) + .collect() +} + +impl PodReader for Runtime { + fn pod_annotations(&self, kubeconfig: &Path, pod: &PodRef) -> Result> { + let client = KubeApiClient::from_kubeconfig(kubeconfig)?; + client.pod_annotations(pod) + } +} + +struct KubeApiClient { + server: String, + token: String, + client: reqwest::blocking::Client, +} + +impl KubeApiClient { + fn from_kubeconfig(path: &Path) -> Result { + let contents = std::fs::read_to_string(path) + .into_diagnostic() + .wrap_err_with(|| format!("failed to read kubeconfig {}", path.display()))?; + let kubeconfig: KubeConfig = serde_yml::from_str(&contents) + .into_diagnostic() + .wrap_err("invalid kubeconfig")?; + let context = kubeconfig + .contexts + .iter() + .find(|context| context.name == kubeconfig.current_context) + .ok_or_else(|| miette::miette!("current kubeconfig context not found"))?; + let cluster = kubeconfig + .clusters + .iter() + .find(|cluster| cluster.name == context.context.cluster) + .ok_or_else(|| miette::miette!("current kubeconfig cluster not found"))?; + let user = kubeconfig + .users + .iter() + .find(|user| user.name == context.context.user) + .ok_or_else(|| miette::miette!("current kubeconfig user not found"))?; + let token = match (&user.user.token, &user.user.token_file) { + (Some(token), _) => token.clone(), + (None, Some(token_file)) => std::fs::read_to_string(token_file) + .into_diagnostic() + .wrap_err_with(|| format!("failed to read kubeconfig token file {token_file}"))? + .trim() + .to_string(), + (None, None) => { + return Err(miette::miette!( + "kubeconfig user must contain token or token-file" + )); + } + }; + let mut builder = reqwest::blocking::Client::builder(); + if let Some(ca) = cluster_certificate_authority(path, &cluster.cluster)? { + builder = builder.add_root_certificate(ca); + } + let client = builder.build().into_diagnostic()?; + Ok(Self { + server: cluster.cluster.server.trim_end_matches('/').to_string(), + token, + client, + }) + } + + fn pod_annotations(&self, pod: &PodRef) -> Result> { + let url = format!( + "{}/api/v1/namespaces/{}/pods/{}", + self.server, pod.namespace, pod.name + ); + let response = self + .client + .get(url) + .bearer_auth(&self.token) + .send() + .into_diagnostic() + .wrap_err("failed to query Kubernetes API for pod annotations")?; + if !response.status().is_success() { + return Err(miette::miette!( + "Kubernetes API returned {} while reading pod {}/{}", + response.status(), + pod.namespace, + pod.name + )); + } + let pod = response.json::().into_diagnostic()?; + Ok(pod.metadata.annotations) + } +} + +fn cluster_certificate_authority( + kubeconfig_path: &Path, + cluster: &ClusterConfig, +) -> Result> { + if let Some(data) = cluster.certificate_authority_data.as_deref() { + let pem = base64::engine::general_purpose::STANDARD + .decode(data) + .into_diagnostic() + .wrap_err("invalid kubeconfig certificate-authority-data")?; + return Ok(Some( + reqwest::Certificate::from_pem(&pem).into_diagnostic()?, + )); + } + if let Some(path) = cluster.certificate_authority.as_deref() { + let ca_path = if Path::new(path).is_absolute() { + PathBuf::from(path) + } else { + kubeconfig_path + .parent() + .unwrap_or_else(|| Path::new(".")) + .join(path) + }; + let pem = std::fs::read(ca_path).into_diagnostic()?; + return Ok(Some( + reqwest::Certificate::from_pem(&pem).into_diagnostic()?, + )); + } + Ok(None) +} + +impl RuleInstaller for Runtime { + fn install(&self, netns: &Path, proxy_uid: u32) -> Result { + install_rules(netns, proxy_uid) + } + + fn cleanup(&self, netns: &Path) -> Result<()> { + cleanup_rules(netns) + } +} + +#[allow(dead_code)] +fn generate_sidecar_bypass_ruleset(proxy_uid: u32, log_prefix: Option<&str>) -> String { + let log_tcp = log_prefix + .map(|p| { + format!( + "\n tcp flags syn limit rate 5/second burst 10 packets log prefix \"{p}\" flags skuid" + ) + }) + .unwrap_or_default(); + let log_udp = log_prefix + .map(|p| { + format!( + "\n meta l4proto udp limit rate 5/second burst 10 packets log prefix \"{p}\" flags skuid" + ) + }) + .unwrap_or_default(); + + format!( + r#"table inet {OPENSHELL_TABLE} {{ + chain output {{ + type filter hook output priority 0; policy accept; + + oifname "lo" accept + ct state established,related accept + meta skuid {proxy_uid} accept{log_tcp} + meta nfproto ipv4 meta l4proto tcp reject with icmp type port-unreachable + meta nfproto ipv6 meta l4proto tcp reject with icmpv6 type port-unreachable{log_udp} + meta nfproto ipv4 meta l4proto udp reject with icmp type port-unreachable + meta nfproto ipv6 meta l4proto udp reject with icmpv6 type port-unreachable + }} +}} +"# + ) +} + +#[cfg(target_os = "linux")] +fn install_rules(netns: &Path, proxy_uid: u32) -> Result { + let nft_error = if let Some(nft) = find_nft() { + match install_nft_rules(netns, proxy_uid, &nft) { + Ok(()) => { + return Ok(InstallReport { backend: "nft" }); + } + Err(error) => Some(one_line_error(&error)), + } + } else { + None + }; + + if let Some(iptables) = find_iptables() { + install_iptables_rules(netns, proxy_uid, &iptables).wrap_err("iptables fallback failed")?; + return Ok(InstallReport { + backend: "iptables", + }); + } + + if let Some(nft_error) = nft_error { + return Err(miette::miette!( + "nft rule installation failed and iptables was not found on node: {nft_error}" + )); + } + + Err(miette::miette!( + "neither nft nor iptables was found on node; OpenShell CNI requires a pod-network firewall backend" + )) +} + +#[cfg(target_os = "linux")] +fn install_nft_rules(netns: &Path, proxy_uid: u32, nft: &str) -> Result<()> { + let _ = run_nft_args_in_netns(netns, &nft, &["delete", "table", "inet", OPENSHELL_TABLE]); + let ruleset = generate_sidecar_bypass_ruleset(proxy_uid, Some("openshell:cni-sidecar:")); + run_nft_ruleset_in_netns(netns, &nft, &ruleset) +} + +#[cfg(not(target_os = "linux"))] +fn install_rules(netns: &Path, proxy_uid: u32) -> Result { + let _ = (netns, proxy_uid); + Err(miette::miette!( + "OpenShell CNI rule installation is supported only on Linux nodes" + )) +} + +#[cfg(target_os = "linux")] +fn cleanup_rules(netns: &Path) -> Result<()> { + if let Some(nft) = find_nft() { + let _ = cleanup_nft_rules(netns, &nft); + } + if let Some(iptables) = find_iptables() { + cleanup_iptables_rules(netns, &iptables); + } + Ok(()) +} + +#[cfg(target_os = "linux")] +fn cleanup_nft_rules(netns: &Path, nft: &str) -> Result<()> { + run_nft_args_in_netns(netns, nft, &["delete", "table", "inet", OPENSHELL_TABLE]) +} + +#[cfg(not(target_os = "linux"))] +#[allow(clippy::unnecessary_wraps)] +fn cleanup_rules(netns: &Path) -> Result<()> { + let _ = netns; + Ok(()) +} + +#[cfg(target_os = "linux")] +fn run_nft_ruleset_in_netns(netns: &Path, nft: &str, ruleset: &str) -> Result<()> { + use std::io::Write; + + let mut tmp = tempfile::Builder::new() + .prefix("openshell-cni-") + .suffix(".nft") + .tempfile() + .into_diagnostic()?; + tmp.write_all(ruleset.as_bytes()).into_diagnostic()?; + let ruleset_path = tmp.path().to_string_lossy().to_string(); + run_nft_args_in_netns(netns, nft, &["-f", &ruleset_path]) +} + +#[cfg(target_os = "linux")] +fn run_nft_args_in_netns(netns: &Path, nft: &str, args: &[&str]) -> Result<()> { + run_command_in_netns(netns, nft, args) +} + +#[cfg(target_os = "linux")] +fn run_command_in_netns(netns: &Path, program: &str, args: &[&str]) -> Result<()> { + use std::os::fd::AsRawFd; + use std::os::unix::process::CommandExt; + + let netns = std::fs::File::open(netns).into_diagnostic()?; + let fd = netns.as_raw_fd(); + let output = { + let mut command = Command::new(program); + command.args(args); + // SAFETY: pre_exec runs in the child after fork and before exec. setns + // only affects that child process before it executes the firewall tool. + #[allow(unsafe_code)] + unsafe { + command.pre_exec(move || { + if libc::setns(fd, libc::CLONE_NEWNET) != 0 { + return Err(std::io::Error::last_os_error()); + } + Ok(()) + }); + } + command.output().into_diagnostic()? + }; + + if output.status.success() { + return Ok(()); + } + Err(miette::miette!( + "{} failed in CNI network namespace: {}", + program, + String::from_utf8_lossy(&output.stderr).trim() + )) +} + +#[cfg(target_os = "linux")] +fn find_nft() -> Option { + find_existing_binary(NFT_SEARCH_PATHS) +} + +#[cfg(target_os = "linux")] +fn find_iptables() -> Option { + find_existing_binary(IPTABLES_SEARCH_PATHS).map(|ipv4| IptablesBackend { + ipv4, + ipv6: find_existing_binary(IP6TABLES_SEARCH_PATHS), + }) +} + +#[cfg(target_os = "linux")] +fn find_existing_binary(paths: &[&str]) -> Option { + paths + .iter() + .find(|path| Path::new(path).is_file()) + .map(|path| (*path).to_string()) +} + +#[cfg(target_os = "linux")] +struct IptablesBackend { + ipv4: String, + ipv6: Option, +} + +#[cfg(target_os = "linux")] +fn install_iptables_rules(netns: &Path, proxy_uid: u32, backend: &IptablesBackend) -> Result<()> { + cleanup_iptables_family(netns, &backend.ipv4); + install_iptables_family(netns, &backend.ipv4, proxy_uid, "icmp-port-unreachable")?; + + if let Some(ipv6) = backend.ipv6.as_deref() { + cleanup_iptables_family(netns, ipv6); + install_iptables_family(netns, ipv6, proxy_uid, "icmp6-port-unreachable")?; + } + + Ok(()) +} + +#[cfg(target_os = "linux")] +fn cleanup_iptables_rules(netns: &Path, backend: &IptablesBackend) { + cleanup_iptables_family(netns, &backend.ipv4); + if let Some(ipv6) = backend.ipv6.as_deref() { + cleanup_iptables_family(netns, ipv6); + } +} + +#[cfg(target_os = "linux")] +fn cleanup_iptables_family(netns: &Path, iptables: &str) { + for _ in 0..16 { + if run_command_in_netns( + netns, + iptables, + &[ + "-w", + "-t", + "filter", + "-D", + "OUTPUT", + "-j", + OPENSHELL_IPTABLES_CHAIN, + ], + ) + .is_err() + { + break; + } + } + let _ = run_command_in_netns( + netns, + iptables, + &["-w", "-t", "filter", "-F", OPENSHELL_IPTABLES_CHAIN], + ); + let _ = run_command_in_netns( + netns, + iptables, + &["-w", "-t", "filter", "-X", OPENSHELL_IPTABLES_CHAIN], + ); +} + +#[cfg(target_os = "linux")] +fn install_iptables_family( + netns: &Path, + iptables: &str, + proxy_uid: u32, + reject_with: &str, +) -> Result<()> { + for args in generate_iptables_install_commands(proxy_uid, reject_with) { + let args = args.iter().map(String::as_str).collect::>(); + run_command_in_netns(netns, iptables, &args)?; + } + Ok(()) +} + +#[cfg(target_os = "linux")] +fn generate_iptables_install_commands(proxy_uid: u32, reject_with: &str) -> Vec> { + let uid = proxy_uid.to_string(); + [ + vec!["-w", "-t", "filter", "-N", OPENSHELL_IPTABLES_CHAIN], + vec![ + "-w", + "-t", + "filter", + "-A", + OPENSHELL_IPTABLES_CHAIN, + "-o", + "lo", + "-j", + "RETURN", + ], + vec![ + "-w", + "-t", + "filter", + "-A", + OPENSHELL_IPTABLES_CHAIN, + "-m", + "conntrack", + "--ctstate", + "ESTABLISHED,RELATED", + "-j", + "RETURN", + ], + vec![ + "-w", + "-t", + "filter", + "-A", + OPENSHELL_IPTABLES_CHAIN, + "-m", + "owner", + "--uid-owner", + &uid, + "-j", + "RETURN", + ], + vec![ + "-w", + "-t", + "filter", + "-A", + OPENSHELL_IPTABLES_CHAIN, + "-p", + "tcp", + "-j", + "REJECT", + "--reject-with", + reject_with, + ], + vec![ + "-w", + "-t", + "filter", + "-A", + OPENSHELL_IPTABLES_CHAIN, + "-p", + "udp", + "-j", + "REJECT", + "--reject-with", + reject_with, + ], + vec![ + "-w", + "-t", + "filter", + "-I", + "OUTPUT", + "1", + "-j", + OPENSHELL_IPTABLES_CHAIN, + ], + ] + .into_iter() + .map(|args| args.into_iter().map(str::to_string).collect()) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + struct TestPods { + annotations: BTreeMap, + } + + impl PodReader for TestPods { + fn pod_annotations( + &self, + _kubeconfig: &Path, + _pod: &PodRef, + ) -> Result> { + Ok(self.annotations.clone()) + } + } + + #[derive(Default)] + struct TestInstaller { + installed: std::sync::Mutex>, + cleaned: std::sync::Mutex, + } + + impl RuleInstaller for TestInstaller { + fn install(&self, _netns: &Path, proxy_uid: u32) -> Result { + self.installed.lock().unwrap().push(proxy_uid); + Ok(InstallReport { backend: "test" }) + } + + fn cleanup(&self, _netns: &Path) -> Result<()> { + *self.cleaned.lock().unwrap() += 1; + Ok(()) + } + } + + fn cni_input() -> String { + serde_json::json!({ + "cniVersion": "1.0.0", + "name": "openshell", + "type": "openshell-cni", + "prevResult": { + "cniVersion": "1.0.0", + "interfaces": [] + }, + "openshell": { + "kubeconfig": "/tmp/openshell-kubeconfig", + "sandboxNamespaces": ["openshell"] + } + }) + .to_string() + } + + fn cni_input_with_log_file(log_file: &Path) -> String { + serde_json::json!({ + "cniVersion": "1.0.0", + "name": "openshell", + "type": "openshell-cni", + "openshell": { + "kubeconfig": "/tmp/openshell-kubeconfig", + "sandboxNamespaces": ["openshell"], + "logFile": log_file.to_string_lossy() + } + }) + .to_string() + } + + fn env(command: &str) -> CniEnv { + CniEnv { + command: command.to_string(), + netns: Some(PathBuf::from("/proc/1/ns/net")), + args: Some("K8S_POD_NAMESPACE=openshell;K8S_POD_NAME=sandbox-1".to_string()), + } + } + + fn openshell_annotations() -> BTreeMap { + BTreeMap::from([ + ( + OPENSHELL_CNI_ENABLED_ANNOTATION.to_string(), + "enabled".to_string(), + ), + ( + OPENSHELL_CNI_NETWORK_ENFORCEMENT_MODE_ANNOTATION.to_string(), + CNI_SIDECAR_NETWORK_ENFORCEMENT_MODE.to_string(), + ), + ( + OPENSHELL_CNI_PROXY_UID_ANNOTATION.to_string(), + "1337".to_string(), + ), + ]) + } + + #[test] + fn parses_kubernetes_cni_args() { + let pod = env("ADD").pod_ref().unwrap(); + assert_eq!(pod.namespace, "openshell"); + assert_eq!(pod.name, "sandbox-1"); + } + + #[test] + fn version_returns_supported_versions() { + let pods = TestPods { + annotations: BTreeMap::new(), + }; + let installer = TestInstaller::default(); + let output = handle_command("", &env("VERSION"), &pods, &installer) + .unwrap() + .unwrap(); + assert_eq!(output["cniVersion"], DEFAULT_CNI_VERSION); + assert!( + output["supportedVersions"] + .as_array() + .unwrap() + .contains(&serde_json::json!("0.3.1")) + ); + assert!( + output["supportedVersions"] + .as_array() + .unwrap() + .contains(&serde_json::json!("1.0.0")) + ); + } + + #[test] + fn add_installs_for_annotated_openshell_pod() { + let pods = TestPods { + annotations: openshell_annotations(), + }; + let installer = TestInstaller::default(); + let output = handle_command(&cni_input(), &env("ADD"), &pods, &installer) + .unwrap() + .unwrap(); + assert_eq!(output["interfaces"], serde_json::json!([])); + assert_eq!(*installer.installed.lock().unwrap(), vec![1337]); + } + + #[test] + fn add_passes_through_non_openshell_pod() { + let pods = TestPods { + annotations: BTreeMap::new(), + }; + let installer = TestInstaller::default(); + let output = handle_command(&cni_input(), &env("ADD"), &pods, &installer) + .unwrap() + .unwrap(); + assert_eq!(output["interfaces"], serde_json::json!([])); + assert!(installer.installed.lock().unwrap().is_empty()); + } + + #[test] + fn add_passes_through_unconfigured_namespace_without_api_lookup() { + struct FailingPods; + + impl PodReader for FailingPods { + fn pod_annotations( + &self, + _kubeconfig: &Path, + _pod: &PodRef, + ) -> Result> { + Err(miette::miette!("unexpected API lookup")) + } + } + + let installer = TestInstaller::default(); + let mut env = env("ADD"); + env.args = Some("K8S_POD_NAMESPACE=kube-system;K8S_POD_NAME=coredns".to_string()); + let output = handle_command(&cni_input(), &env, &FailingPods, &installer) + .unwrap() + .unwrap(); + assert_eq!(output["interfaces"], serde_json::json!([])); + assert!(installer.installed.lock().unwrap().is_empty()); + } + + #[test] + fn del_cleans_when_netns_available() { + let pods = TestPods { + annotations: openshell_annotations(), + }; + let installer = TestInstaller::default(); + handle_command("", &env("DEL"), &pods, &installer).unwrap(); + assert_eq!(*installer.cleaned.lock().unwrap(), 1); + } + + #[test] + fn sidecar_ruleset_allows_proxy_uid_before_rejects() { + let ruleset = generate_sidecar_bypass_ruleset(1337, Some("openshell:cni-sidecar:")); + let uid_pos = ruleset.find("meta skuid 1337 accept").unwrap(); + let reject_pos = ruleset + .find("meta nfproto ipv4 meta l4proto tcp reject") + .unwrap(); + assert!(uid_pos < reject_pos); + assert!(ruleset.contains("oifname \"lo\" accept")); + assert_eq!( + ruleset + .matches("log prefix \"openshell:cni-sidecar:\"") + .count(), + 2 + ); + } + + #[cfg(target_os = "linux")] + #[test] + fn iptables_fallback_commands_allow_proxy_uid_before_rejects() { + let commands = generate_iptables_install_commands(1337, "icmp-port-unreachable"); + let rendered = commands + .iter() + .map(|command| command.join(" ")) + .collect::>() + .join("\n"); + let uid_pos = rendered.find("--uid-owner 1337 -j RETURN").unwrap(); + let reject_pos = rendered.find("-p tcp -j REJECT").unwrap(); + assert!(uid_pos < reject_pos); + assert!(rendered.contains("-A OPENSHELL_OUTPUT -o lo -j RETURN")); + assert!(rendered.contains("-I OUTPUT 1 -j OPENSHELL_OUTPUT")); + assert!(rendered.contains("--reject-with icmp-port-unreachable")); + } + + #[test] + fn cni_errors_append_to_configured_log_file() { + let dir = tempfile::tempdir().unwrap(); + let log_file = dir.path().join("openshell-cni.log"); + let error = miette::miette!( + "neither nft nor iptables was found on node; OpenShell CNI requires a pod-network firewall backend" + ); + + log_cni_error(&cni_input_with_log_file(&log_file), &env("ADD"), &error); + + let log = std::fs::read_to_string(log_file).unwrap(); + assert!(log.contains("command=ADD")); + assert!(log.contains("pod=openshell/sandbox-1")); + assert!(log.contains("neither nft nor iptables was found")); + } + + #[test] + fn add_success_appends_to_configured_log_file() { + let dir = tempfile::tempdir().unwrap(); + let log_file = dir.path().join("openshell-cni.log"); + let pods = TestPods { + annotations: openshell_annotations(), + }; + let installer = TestInstaller::default(); + + handle_command( + &cni_input_with_log_file(&log_file), + &env("ADD"), + &pods, + &installer, + ) + .unwrap(); + + let log = std::fs::read_to_string(log_file).unwrap(); + assert!(log.contains("command=ADD")); + assert!(log.contains("pod=openshell/sandbox-1")); + assert!(log.contains("status=installed")); + assert!(log.contains("backend=test")); + assert!(log.contains("proxy_uid=1337")); + } + + #[cfg(target_os = "linux")] + #[test] + fn nft_search_path_includes_k3s_aux_path() { + assert!(NFT_SEARCH_PATHS.contains(&"/bin/aux/nft")); + } +} diff --git a/crates/openshell-cni/src/main.rs b/crates/openshell-cni/src/main.rs new file mode 100644 index 000000000..ab2235826 --- /dev/null +++ b/crates/openshell-cni/src/main.rs @@ -0,0 +1,9 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +fn main() { + if let Err(error) = openshell_cni::run() { + eprintln!("{error:?}"); + std::process::exit(1); + } +} diff --git a/crates/openshell-driver-kubernetes/README.md b/crates/openshell-driver-kubernetes/README.md index f26b877c9..8c4256dad 100644 --- a/crates/openshell-driver-kubernetes/README.md +++ b/crates/openshell-driver-kubernetes/README.md @@ -66,6 +66,13 @@ process supervisor defaults to network-only mode and does not apply Landlock filesystem policy, process privilege dropping, or process/binary identity checks. Network endpoint and L7 policy remain enforced by the network sidecar. +The `cni-sidecar` supervisor topology keeps the sidecar runtime model, but +removes the pod-local network init container. The driver annotates sandbox pods +for the OpenShell chained CNI plugin, and the privileged OpenShell CNI +DaemonSet installs the sidecar bypass-prevention rules during CNI `ADD` before +the workload starts. The agent container and long-running network sidecar remain +non-root with no added Linux capabilities. + The `proxy-pod` supervisor topology runs network enforcement and gateway forwarding in a separate supervisor Deployment with one pod. The agent pod runs only the process-mode supervisor and reaches the supervisor through a @@ -78,11 +85,11 @@ Set `process_enforcement = "full"` in sidecar or proxy-pod topology only when you want combined-mode process/filesystem guards and accept the added agent-container permissions. -Sidecar mode uses the pod `fsGroup` to make the projected service-account token -and sandbox client TLS secret group-readable so the non-root process supervisor -can authenticate to the gateway. Treat the agent container as trusted with -respect to those in-pod gateway credentials until a narrower credential handoff -exists. +Sidecar, cni-sidecar, and proxy-pod modes use the pod `fsGroup` to make the +projected service-account token and sandbox client TLS secret group-readable so +the non-root process supervisor can authenticate to the gateway. Treat the agent +container as trusted with respect to those in-pod gateway credentials until a +narrower credential handoff exists. The driver can request a Kubernetes AppArmor profile through `app_armor_profile`. diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index 68488a82d..7abc9ed56 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -65,6 +65,10 @@ pub enum SupervisorTopology { /// Run network supervision in a privileged sidecar and process supervision /// as a low-capability wrapper in the agent container. Sidecar, + /// Run network supervision in a sidecar, with pod-network rules installed + /// by the `OpenShell` chained CNI plugin instead of a privileged init + /// container. + CniSidecar, /// Run network supervision in a separate supervisor pod and process /// supervision as a low-capability wrapper in the agent pod. ProxyPod, @@ -75,6 +79,7 @@ impl std::fmt::Display for SupervisorTopology { match self { Self::Combined => f.write_str("combined"), Self::Sidecar => f.write_str("sidecar"), + Self::CniSidecar => f.write_str("cni-sidecar"), Self::ProxyPod => f.write_str("proxy-pod"), } } @@ -87,6 +92,7 @@ impl FromStr for SupervisorTopology { match s { "combined" => Ok(Self::Combined), "sidecar" => Ok(Self::Sidecar), + "cni-sidecar" => Ok(Self::CniSidecar), "proxy-pod" => Ok(Self::ProxyPod), other => Err(format!("unknown supervisor topology '{other}'")), } @@ -556,6 +562,16 @@ mod tests { assert_eq!(cfg.supervisor_topology.to_string(), "proxy-pod"); } + #[test] + fn serde_override_supervisor_topology_cni_sidecar() { + let json = serde_json::json!({ + "supervisor_topology": "cni-sidecar" + }); + let cfg: KubernetesComputeConfig = serde_json::from_value(json).unwrap(); + assert_eq!(cfg.supervisor_topology, SupervisorTopology::CniSidecar); + assert_eq!(cfg.supervisor_topology.to_string(), "cni-sidecar"); + } + #[test] fn serde_override_process_enforcement_full() { let json = serde_json::json!({ diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index 6891e4f6f..674730507 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -1212,6 +1212,7 @@ const SIDECAR_CLIENT_TLS_MOUNT_PATH: &str = "/etc/openshell-tls/proxy/client"; /// connects here for gateway gRPC, and the sidecar forwards bytes to the real /// gateway endpoint using its own network privileges. const SIDECAR_GATEWAY_FORWARD_ADDR: &str = "127.0.0.1:18080"; +const SIDECAR_PROXY_PORT: u16 = 3128; const LABEL_SANDBOX_ROLE: &str = "openshell.ai/sandbox-role"; const SANDBOX_ROLE_AGENT: &str = "agent"; @@ -1225,6 +1226,14 @@ const PROXY_POD_CA_CERT_FILE: &str = "openshell-ca.pem"; const PROXY_POD_CA_KEY_FILE: &str = "openshell-ca-key.pem"; const PROXY_POD_SSH_SOCKET_FILE: &str = "/tmp/openshell/ssh.sock"; +const OPENSHELL_CNI_ENABLED_ANNOTATION: &str = "openshell.ai/cni"; +const OPENSHELL_CNI_SANDBOX_ID_ANNOTATION: &str = "openshell.ai/sandbox-id"; +const OPENSHELL_CNI_PROXY_UID_ANNOTATION: &str = "openshell.ai/proxy-uid"; +const OPENSHELL_CNI_PROXY_PORT_ANNOTATION: &str = "openshell.ai/proxy-port"; +const OPENSHELL_CNI_NETWORK_ENFORCEMENT_MODE_ANNOTATION: &str = + "openshell.ai/network-enforcement-mode"; +const CNI_SIDECAR_NETWORK_ENFORCEMENT_MODE: &str = "cni-sidecar"; + /// Build the emptyDir volume that holds the supervisor binary. /// /// The init container writes the binary here; the agent container reads it. @@ -1570,6 +1579,7 @@ fn supervisor_sidecar_env( template_environment: &std::collections::HashMap, spec_environment: &std::collections::HashMap, params: &SandboxPodParams<'_>, + topology: SupervisorTopology, ) -> Vec { let mut env = Vec::new(); apply_required_env( @@ -1602,7 +1612,7 @@ fn supervisor_sidecar_env( upsert_env( &mut env, openshell_core::sandbox_env::SUPERVISOR_TOPOLOGY, - "sidecar", + &topology.to_string(), ); upsert_env( &mut env, @@ -1641,6 +1651,7 @@ fn supervisor_sidecar_container( template_environment: &std::collections::HashMap, spec_environment: &std::collections::HashMap, params: &SandboxPodParams<'_>, + topology: SupervisorTopology, ) -> serde_json::Value { let mut container = serde_json::json!({ "name": SUPERVISOR_NETWORK_SIDECAR_NAME, @@ -1649,7 +1660,7 @@ fn supervisor_sidecar_container( SUPERVISOR_IMAGE_BINARY_PATH, "--mode=network", ], - "env": supervisor_sidecar_env(template_environment, spec_environment, params), + "env": supervisor_sidecar_env(template_environment, spec_environment, params, topology), "securityContext": { "runAsUser": params.proxy_uid, "runAsGroup": params.sandbox_gid, @@ -1741,6 +1752,7 @@ fn apply_supervisor_sidecar_topology( template_environment: &std::collections::HashMap, spec_environment: &std::collections::HashMap, params: &SandboxPodParams<'_>, + install_network_init: bool, ) { let Some(spec) = pod_template.get_mut("spec").and_then(|v| v.as_object_mut()) else { return; @@ -1775,12 +1787,14 @@ fn apply_supervisor_sidecar_topology( })); } - let init_containers = spec - .entry("initContainers") - .or_insert_with(|| serde_json::json!([])) - .as_array_mut(); - if let Some(init_containers) = init_containers { - init_containers.push(supervisor_network_init_container(params)); + if install_network_init { + let init_containers = spec + .entry("initContainers") + .or_insert_with(|| serde_json::json!([])) + .as_array_mut(); + if let Some(init_containers) = init_containers { + init_containers.push(supervisor_network_init_container(params)); + } } let Some(containers) = spec.get_mut("containers").and_then(|v| v.as_array_mut()) else { @@ -1875,7 +1889,7 @@ fn apply_supervisor_sidecar_topology( upsert_env( env, openshell_core::sandbox_env::SUPERVISOR_TOPOLOGY, - "sidecar", + ¶ms.supervisor_topology.to_string(), ); upsert_env( env, @@ -1924,6 +1938,7 @@ fn apply_supervisor_sidecar_topology( template_environment, spec_environment, params, + params.supervisor_topology, )); } @@ -2435,7 +2450,7 @@ impl Default for SandboxPodParams<'_> { fn validate_proxy_identity(params: &SandboxPodParams<'_>) -> Result<(), KubernetesDriverError> { if matches!( params.supervisor_topology, - SupervisorTopology::Sidecar | SupervisorTopology::ProxyPod + SupervisorTopology::Sidecar | SupervisorTopology::CniSidecar | SupervisorTopology::ProxyPod ) && params.proxy_uid == params.sandbox_uid { let topology = params.supervisor_topology.to_string(); @@ -2570,6 +2585,7 @@ fn sandbox_template_to_k8s_with_gpu_requirements( .map(|(key, value)| (key.clone(), serde_json::Value::String(value.clone()))) .collect::>(); let proxy_pod_topology = params.supervisor_topology == SupervisorTopology::ProxyPod; + let cni_sidecar_topology = params.supervisor_topology == SupervisorTopology::CniSidecar; if params.provider_spiffe_enabled || proxy_pod_topology { pod_labels.insert( LABEL_MANAGED_BY.to_string(), @@ -2609,6 +2625,30 @@ fn sandbox_template_to_k8s_with_gpu_requirements( serde_json::Value::String(params.sandbox_id.to_string()), ); } + if cni_sidecar_topology { + pod_annotations.insert( + OPENSHELL_CNI_ENABLED_ANNOTATION.to_string(), + serde_json::Value::String("enabled".to_string()), + ); + if !params.sandbox_id.is_empty() { + pod_annotations.insert( + OPENSHELL_CNI_SANDBOX_ID_ANNOTATION.to_string(), + serde_json::Value::String(params.sandbox_id.to_string()), + ); + } + pod_annotations.insert( + OPENSHELL_CNI_PROXY_UID_ANNOTATION.to_string(), + serde_json::Value::String(params.proxy_uid.to_string()), + ); + pod_annotations.insert( + OPENSHELL_CNI_PROXY_PORT_ANNOTATION.to_string(), + serde_json::Value::String(SIDECAR_PROXY_PORT.to_string()), + ); + pod_annotations.insert( + OPENSHELL_CNI_NETWORK_ENFORCEMENT_MODE_ANNOTATION.to_string(), + serde_json::Value::String(CNI_SIDECAR_NETWORK_ENFORCEMENT_MODE.to_string()), + ); + } if !pod_annotations.is_empty() { metadata.insert( "annotations".to_string(), @@ -2772,7 +2812,9 @@ fn sandbox_template_to_k8s_with_gpu_requirements( if !params.client_tls_secret_name.is_empty() { let client_tls_default_mode = match params.supervisor_topology { SupervisorTopology::Combined => 0o400, - SupervisorTopology::Sidecar | SupervisorTopology::ProxyPod => 0o440, + SupervisorTopology::Sidecar + | SupervisorTopology::CniSidecar + | SupervisorTopology::ProxyPod => 0o440, }; volumes.push(serde_json::json!({ "name": "openshell-client-tls", @@ -2798,7 +2840,9 @@ fn sandbox_template_to_k8s_with_gpu_requirements( // supervisor containers run with the sandbox GID and need group-read access. let sa_token_default_mode = match params.supervisor_topology { SupervisorTopology::Combined => 0o400, - SupervisorTopology::Sidecar | SupervisorTopology::ProxyPod => 0o440, + SupervisorTopology::Sidecar + | SupervisorTopology::CniSidecar + | SupervisorTopology::ProxyPod => 0o440, }; volumes.push(serde_json::json!({ "name": "openshell-sa-token", @@ -2843,6 +2887,16 @@ fn sandbox_template_to_k8s_with_gpu_requirements( &template.environment, spec_environment, params, + true, + ); + } + SupervisorTopology::CniSidecar => { + apply_supervisor_sidecar_topology( + &mut result, + &template.environment, + spec_environment, + params, + false, ); } SupervisorTopology::ProxyPod => { @@ -4485,6 +4539,88 @@ mod tests { ); } + #[test] + fn cni_sidecar_topology_omits_network_init_and_adds_cni_annotations() { + let params = SandboxPodParams { + supervisor_topology: SupervisorTopology::CniSidecar, + supervisor_sideload_method: SupervisorSideloadMethod::ImageVolume, + supervisor_image: "supervisor-image:latest", + grpc_endpoint: "http://openshell-gateway.openshell.svc:8080", + sandbox_id: "sb-cni", + proxy_uid: 2200, + namespace: "default", + sandbox_uid: 1500, + sandbox_gid: 1500, + ..SandboxPodParams::default() + }; + let pod_template = sandbox_template_to_k8s( + &SandboxTemplate { + image: "agent-image:latest".to_string(), + ..SandboxTemplate::default() + }, + false, + &std::collections::HashMap::new(), + false, + ¶ms, + ); + + let annotations = pod_template["metadata"]["annotations"].as_object().unwrap(); + assert_eq!( + annotations[OPENSHELL_CNI_ENABLED_ANNOTATION], + serde_json::json!("enabled") + ); + assert_eq!( + annotations[OPENSHELL_CNI_SANDBOX_ID_ANNOTATION], + serde_json::json!("sb-cni") + ); + assert_eq!( + annotations[OPENSHELL_CNI_PROXY_UID_ANNOTATION], + serde_json::json!("2200") + ); + assert_eq!( + annotations[OPENSHELL_CNI_PROXY_PORT_ANNOTATION], + serde_json::json!(SIDECAR_PROXY_PORT.to_string()) + ); + assert_eq!( + annotations[OPENSHELL_CNI_NETWORK_ENFORCEMENT_MODE_ANNOTATION], + serde_json::json!(CNI_SIDECAR_NETWORK_ENFORCEMENT_MODE) + ); + + let init_containers = pod_template["spec"] + .get("initContainers") + .and_then(|containers| containers.as_array()) + .cloned() + .unwrap_or_default(); + assert!( + !init_containers + .iter() + .any(|container| container["name"] == SUPERVISOR_NETWORK_INIT_CONTAINER_NAME) + ); + + let containers = pod_template["spec"]["containers"].as_array().unwrap(); + assert_eq!(containers.len(), 2); + let agent = containers + .iter() + .find(|container| container["name"] == "agent") + .unwrap(); + assert_eq!( + rendered_env(agent, openshell_core::sandbox_env::SUPERVISOR_TOPOLOGY), + Some("cni-sidecar") + ); + assert_eq!( + rendered_env(agent, openshell_core::sandbox_env::NETWORK_ENFORCEMENT_MODE), + Some("sidecar-nftables") + ); + let sidecar = containers + .iter() + .find(|container| container["name"] == SUPERVISOR_NETWORK_SIDECAR_NAME) + .unwrap(); + assert_eq!( + rendered_env(sidecar, openshell_core::sandbox_env::SUPERVISOR_TOPOLOGY), + Some("cni-sidecar") + ); + } + #[test] fn sidecar_topology_rejects_proxy_uid_matching_sandbox_uid() { let params = SandboxPodParams { diff --git a/deploy/docker/Dockerfile.supervisor b/deploy/docker/Dockerfile.supervisor index c760bbc89..3973993ea 100644 --- a/deploy/docker/Dockerfile.supervisor +++ b/deploy/docker/Dockerfile.supervisor @@ -10,8 +10,9 @@ # path. It also includes nftables so the Kubernetes supervisor sidecar can # install pod-namespace egress enforcement rules. # -# The Rust binary is built natively before this image build runs and staged at: +# The Rust binaries are built natively before this image build runs and staged at: # deploy/docker/.build/prebuilt-binaries//openshell-sandbox +# deploy/docker/.build/prebuilt-binaries//openshell-cni # # Use tasks/scripts/docker-build-image.sh supervisor (or `mise run build:docker:supervisor`) # to stage the binary and build the image in one step. CI builds the binary @@ -23,12 +24,13 @@ FROM alpine:3.22 AS supervisor ARG TARGETARCH -RUN apk add --no-cache nftables iptables iptables-legacy +RUN apk add --no-cache nftables iptables iptables-legacy jq # --chmod=0555 restores execute bits after the actions/upload-artifact + # download-artifact roundtrip strips them. Ownership stays root (0:0) for # Podman image-volume mounts, while world-execute lets the Kubernetes # network sidecar run this binary as the dedicated non-root proxy UID. COPY --chmod=0555 deploy/docker/.build/prebuilt-binaries/${TARGETARCH}/openshell-sandbox /openshell-sandbox +COPY --chmod=0555 deploy/docker/.build/prebuilt-binaries/${TARGETARCH}/openshell-cni /openshell-cni ENTRYPOINT ["/openshell-sandbox"] diff --git a/deploy/helm/openshell/README.md b/deploy/helm/openshell/README.md index 5ec262e49..175180a28 100644 --- a/deploy/helm/openshell/README.md +++ b/deploy/helm/openshell/README.md @@ -146,6 +146,19 @@ add `ci/values-spire.yaml` to the OpenShell release values files. | certManager.enabled | bool | `false` | Create cert-manager Issuer and Certificate resources. When enabled, cert-manager owns TLS and the chart runs a JWT-only certgen hook to create the sandbox JWT signing Secret that cert-manager does not manage. | | certManager.serverDnsNames | list | `["openshell","openshell.openshell.svc","openshell.openshell.svc.cluster.local","localhost","openshell.localhost","*.openshell.localhost","host.docker.internal"]` | DNS SANs on the cert-manager-issued server certificate. | | certManager.serverIpAddresses | list | `["127.0.0.1"]` | IP SANs on the cert-manager-issued server certificate. | +| cni.affinity | object | `{}` | | +| cni.binDir | string | `"/opt/cni/bin"` | Host CNI binary directory. | +| cni.confDir | string | `"/etc/cni/net.d"` | Host CNI config directory. | +| cni.configFile | string | `""` | Host CNI conflist filename patched by the installer. Empty selects the first non-OpenShell .conflist. | +| cni.enabled | bool | `false` | Install the OpenShell chained CNI plugin with a privileged node DaemonSet. Required when supervisor.topology is "cni-sidecar". | +| cni.image.pullPolicy | string | `""` | CNI installer image pull policy. Empty uses supervisor.image.pullPolicy, then image.pullPolicy. | +| cni.image.repository | string | `""` | CNI installer image repository. Empty uses supervisor.image.repository. | +| cni.image.tag | string | `""` | CNI installer image tag. Empty uses supervisor.image.tag, then chart appVersion. | +| cni.logFile | string | `"/var/log/openshell-cni.log"` | Host log file written by the OpenShell CNI plugin and tailed by the installer DaemonSet. | +| cni.logLevel | string | `"info"` | Log level passed to the OpenShell CNI plugin. | +| cni.nodeSelector | object | `{}` | | +| cni.resources | object | `{}` | | +| cni.tolerations | list | `[]` | | | fullnameOverride | string | `""` | Override the full generated resource name. | | grpcRoute.enabled | bool | `false` | Create a Gateway API GRPCRoute for the gateway service. | | grpcRoute.gateway.className | string | `"eg"` | GatewayClass to reference. Envoy Gateway installs one named "eg". | @@ -237,9 +250,9 @@ add `ci/values-spire.yaml` to the OpenShell release values files. | supervisor.image.repository | string | `"ghcr.io/nvidia/openshell/supervisor"` | Supervisor image repository. | | supervisor.image.tag | string | `""` | Supervisor image tag. Defaults to the chart appVersion when empty. | | supervisor.processEnforcement | string | `"network-only"` | Process/filesystem controls applied by the agent process supervisor in non-combined topologies. "network-only" keeps the low-permission agent shape; "full" grants combined-mode process/filesystem controls. | -| supervisor.proxyUid | int | `1337` | UID for the long-running network sidecar or proxy supervisor pod. In sidecar topology, the network init container installs nftables rules that exempt this UID. | +| supervisor.proxyUid | int | `1337` | UID for the long-running network sidecar or proxy supervisor pod. In sidecar topology, the network init container installs nftables rules that exempt this UID. In cni-sidecar topology, the CNI plugin installs equivalent nftables or iptables rules. | | supervisor.sideloadMethod | string | `""` | How the supervisor binary is delivered into sandbox pods. Empty (default) = auto-detect from cluster version: K8s >= v1.35 -> "image-volume" (ImageVolume enabled by default; GA in v1.36) K8s < v1.35 -> "init-container" (copies via init container + emptyDir) On K8s v1.33-v1.34 with the ImageVolume feature gate manually enabled, set this to "image-volume" explicitly. | -| supervisor.topology | string | `"combined"` | Supervisor pod topology for Kubernetes sandboxes. "combined" runs the current single supervisor container in the agent pod. "sidecar" runs network enforcement in a dedicated sidecar and the process supervisor as a low-capability wrapper in the agent container. "proxy-pod" runs network enforcement in a separate supervisor Deployment and restricts the agent pod to that supervisor through NetworkPolicy. | +| supervisor.topology | string | `"combined"` | Supervisor pod topology for Kubernetes sandboxes. "combined" runs the current single supervisor container in the agent pod. "sidecar" runs network enforcement in a dedicated sidecar and the process supervisor as a low-capability wrapper in the agent container. "cni-sidecar" keeps the sidecar runtime model but installs pod-network rules through the OpenShell CNI plugin. "proxy-pod" runs network enforcement in a separate supervisor Deployment and restricts the agent pod to that supervisor through NetworkPolicy. | | tolerations | list | `[]` | Tolerations for the gateway pod. | | workload.allowMultiReplicaStatefulSet | bool | `false` | Allow replicaCount > 1 while rendering a StatefulSet. Prefer workload.kind=deployment for external database-backed multi-replica gateways; this override exists for operators who explicitly require StatefulSet identity or storage semantics. | | workload.kind | string | `"statefulset"` | Gateway workload controller kind. Use `statefulset` for the default SQLite database, or `deployment` when server.externalDbSecret points at an external database. | diff --git a/deploy/helm/openshell/ci/values-cni-sidecar.yaml b/deploy/helm/openshell/ci/values-cni-sidecar.yaml new file mode 100644 index 000000000..fb649fa6d --- /dev/null +++ b/deploy/helm/openshell/ci/values-cni-sidecar.yaml @@ -0,0 +1,15 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# CI/dev overlay for exercising the Kubernetes CNI sidecar topology. +# This topology requires the OpenShell chained CNI plugin to be installed on +# every node before sandbox pods are created. + +cni: + enabled: true + # k3s configures containerd with k3s-specific CNI paths. + binDir: /bin + confDir: /var/lib/rancher/k3s/agent/etc/cni/net.d + +supervisor: + topology: cni-sidecar diff --git a/deploy/helm/openshell/skaffold.yaml b/deploy/helm/openshell/skaffold.yaml index cf99be69a..2d652bc6f 100644 --- a/deploy/helm/openshell/skaffold.yaml +++ b/deploy/helm/openshell/skaffold.yaml @@ -126,6 +126,10 @@ deploy: # includes its embedded network policy controller; if you replace the # CNI, install a policy-enforcing CNI before deploying this profile. #- ci/values-proxy-pod.yaml + # To exercise CNI sidecar topology, use the cni-sidecar Skaffold + # profile against a cluster where the OpenShell CNI DaemonSet can + # patch the node CNI conflist. + #- ci/values-cni-sidecar.yaml # To test multi-replica external PostgreSQL behavior: #- ci/values-high-availability.yaml setValueTemplates: @@ -144,3 +148,8 @@ profiles: - op: add path: /deploy/helm/releases/0/valuesFiles/- value: ci/values-proxy-pod.yaml + - name: cni-sidecar + patches: + - op: add + path: /deploy/helm/releases/0/valuesFiles/- + value: ci/values-cni-sidecar.yaml diff --git a/deploy/helm/openshell/templates/_gateway-workload.tpl b/deploy/helm/openshell/templates/_gateway-workload.tpl index 5931047e5..e9783a88c 100644 --- a/deploy/helm/openshell/templates/_gateway-workload.tpl +++ b/deploy/helm/openshell/templates/_gateway-workload.tpl @@ -17,6 +17,7 @@ metadata: {{- end }} labels: {{- include "openshell.labels" . | nindent 4 }} + app.kubernetes.io/component: gateway {{- with .Values.podLabels }} {{- toYaml . | nindent 4 }} {{- end }} diff --git a/deploy/helm/openshell/templates/_helpers.tpl b/deploy/helm/openshell/templates/_helpers.tpl index 5872dc404..72f4aa646 100644 --- a/deploy/helm/openshell/templates/_helpers.tpl +++ b/deploy/helm/openshell/templates/_helpers.tpl @@ -86,6 +86,20 @@ the supervisor and gateway images stay in sync across releases. {{- printf "%s:%s" .Values.supervisor.image.repository (.Values.supervisor.image.tag | default .Chart.AppVersion) }} {{- end }} +{{/* +CNI installer image reference. Defaults to the supervisor image because the +supervisor image carries both openshell-sandbox and openshell-cni. +*/}} +{{- define "openshell.cniImage" -}} +{{- $repository := .Values.cni.image.repository | default .Values.supervisor.image.repository -}} +{{- $tag := .Values.cni.image.tag | default .Values.supervisor.image.tag | default .Chart.AppVersion -}} +{{- printf "%s:%s" $repository $tag }} +{{- end }} + +{{- define "openshell.cniImagePullPolicy" -}} +{{- .Values.cni.image.pullPolicy | default .Values.supervisor.image.pullPolicy | default .Values.image.pullPolicy -}} +{{- end }} + {{/* Namespaced Issuer (selfSigned) for cert-manager CA bootstrap. */}} diff --git a/deploy/helm/openshell/templates/cni-daemonset.yaml b/deploy/helm/openshell/templates/cni-daemonset.yaml new file mode 100644 index 000000000..b19596db4 --- /dev/null +++ b/deploy/helm/openshell/templates/cni-daemonset.yaml @@ -0,0 +1,181 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +{{- if and (eq (.Values.supervisor.topology | default "combined") "cni-sidecar") (not .Values.cni.enabled) -}} +{{- fail "supervisor.topology=cni-sidecar requires cni.enabled=true so the OpenShell chained CNI plugin is installed on every node" -}} +{{- end }} +{{- if .Values.cni.enabled }} +{{- $sandboxNamespace := .Values.server.sandboxNamespace | default .Release.Namespace }} +{{- $cniLogFile := .Values.cni.logFile | default "/var/log/openshell-cni.log" }} +{{- $cniLogDir := dir $cniLogFile }} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "openshell.fullname" . }}-cni + labels: + {{- include "openshell.labels" . | nindent 4 }} + app.kubernetes.io/component: cni +spec: + selector: + matchLabels: + {{- include "openshell.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: cni + template: + metadata: + labels: + {{- include "openshell.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: cni + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ include "openshell.serviceAccountName" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: install-cni + image: {{ include "openshell.cniImage" . | quote }} + imagePullPolicy: {{ include "openshell.cniImagePullPolicy" . | quote }} + command: + - sh + - -ec + - | + bin_dir="/host{{ .Values.cni.binDir }}" + conf_dir="/host{{ .Values.cni.confDir }}" + host_conf_dir="{{ .Values.cni.confDir }}" + log_file="{{ $cniLogFile }}" + host_log_file="/host${log_file}" + kubeconfig="${conf_dir}/openshell-cni-kubeconfig" + token_file="${conf_dir}/openshell-cni-token" + ca_file="${conf_dir}/openshell-cni-ca.crt" + plugin_config="${conf_dir}/openshell-cni-plugin.json" + mkdir -p "${bin_dir}" "${conf_dir}" + touch "${host_log_file}" + chmod 0644 "${host_log_file}" + install -m 0755 /openshell-cni "${bin_dir}/openshell-cni" + cp /var/run/secrets/kubernetes.io/serviceaccount/ca.crt "${ca_file}" + cp /var/run/secrets/kubernetes.io/serviceaccount/token "${token_file}" + chmod 0600 "${token_file}" + cat > "${kubeconfig}" < "${plugin_config}" <&2 + exit 1 + fi + if [ ! -f "${target}.openshell-backup" ]; then + cp "${target}" "${target}.openshell-backup" + fi + tmp="$(mktemp "${conf_dir}/openshell-cni.XXXXXX")" + jq --slurpfile openshell "${plugin_config}" ' + .plugins = ((.plugins // []) | map(select(.type != "openshell-cni")) + [$openshell[0]]) + ' "${target}" > "${tmp}" + mv "${tmp}" "${target}" + echo "OpenShell CNI installed; plugin log ${log_file}; firewall backend selected during CNI ADD" + tail -n 0 -F "${host_log_file}" & + tail_pid="$!" + trap 'kill "${tail_pid}" 2>/dev/null || true; exit 0' INT TERM + trap 'kill "${tail_pid}" 2>/dev/null || true' EXIT + while true; do + cp /var/run/secrets/kubernetes.io/serviceaccount/token "${token_file}" + chmod 0600 "${token_file}" + sleep 300 + done + securityContext: + privileged: true + allowPrivilegeEscalation: true + lifecycle: + preStop: + exec: + command: + - sh + - -ec + - | + conf_dir="/host{{ .Values.cni.confDir }}" + for target in "${conf_dir}"/*.conflist; do + [ -f "${target}" ] || continue + tmp="$(mktemp "${conf_dir}/openshell-cni-cleanup.XXXXXX")" + jq ' + if .plugins then + .plugins = (.plugins | map(select(.type != "openshell-cni"))) + else + . + end + ' "${target}" > "${tmp}" + mv "${tmp}" "${target}" + done + rm -f \ + "${conf_dir}/openshell-cni-ca.crt" \ + "${conf_dir}/openshell-cni-kubeconfig" \ + "${conf_dir}/openshell-cni-plugin.json" \ + "${conf_dir}/openshell-cni-token" + volumeMounts: + - name: cni-bin + mountPath: /host{{ .Values.cni.binDir }} + - name: cni-conf + mountPath: /host{{ .Values.cni.confDir }} + - name: cni-log + mountPath: /host{{ $cniLogDir }} + {{- with .Values.cni.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: cni-bin + hostPath: + path: {{ .Values.cni.binDir | quote }} + type: DirectoryOrCreate + - name: cni-conf + hostPath: + path: {{ .Values.cni.confDir | quote }} + type: Directory + - name: cni-log + hostPath: + path: {{ $cniLogDir | quote }} + type: DirectoryOrCreate + {{- with .Values.cni.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.cni.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.cni.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/deploy/helm/openshell/templates/deployment.yaml b/deploy/helm/openshell/templates/deployment.yaml index e93797937..de9ae012d 100644 --- a/deploy/helm/openshell/templates/deployment.yaml +++ b/deploy/helm/openshell/templates/deployment.yaml @@ -13,6 +13,7 @@ spec: selector: matchLabels: {{- include "openshell.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: gateway template: {{- include "openshell.gatewayPodTemplate" . | nindent 4 }} {{- end }} diff --git a/deploy/helm/openshell/templates/service.yaml b/deploy/helm/openshell/templates/service.yaml index ebad42eab..921eec4ee 100644 --- a/deploy/helm/openshell/templates/service.yaml +++ b/deploy/helm/openshell/templates/service.yaml @@ -26,3 +26,4 @@ spec: {{- end }} selector: {{- include "openshell.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: gateway diff --git a/deploy/helm/openshell/templates/statefulset.yaml b/deploy/helm/openshell/templates/statefulset.yaml index 30571f80b..205f9a8da 100644 --- a/deploy/helm/openshell/templates/statefulset.yaml +++ b/deploy/helm/openshell/templates/statefulset.yaml @@ -14,6 +14,7 @@ spec: selector: matchLabels: {{- include "openshell.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: gateway template: {{- include "openshell.gatewayPodTemplate" . | nindent 4 }} volumeClaimTemplates: diff --git a/deploy/helm/openshell/tests/cni_daemonset_test.yaml b/deploy/helm/openshell/tests/cni_daemonset_test.yaml new file mode 100644 index 000000000..d54f05ed3 --- /dev/null +++ b/deploy/helm/openshell/tests/cni_daemonset_test.yaml @@ -0,0 +1,71 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +suite: cni daemonset +templates: + - templates/cni-daemonset.yaml +tests: + - it: does not render by default + template: templates/cni-daemonset.yaml + asserts: + - hasDocuments: + count: 0 + + - it: renders privileged cni installer daemonset when enabled + template: templates/cni-daemonset.yaml + set: + cni.enabled: true + asserts: + - isKind: + of: DaemonSet + - equal: + path: spec.template.spec.hostNetwork + value: true + - equal: + path: spec.template.spec.containers[0].securityContext.privileged + value: true + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: cni-bin + mountPath: /host/opt/cni/bin + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: cni-conf + mountPath: /host/etc/cni/net.d + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: cni-log + mountPath: /host/var/log + - matchRegex: + path: spec.template.spec.containers[0].command[2] + pattern: '"sandboxNamespaces": \["NAMESPACE"\]' + - matchRegex: + path: spec.template.spec.containers[0].command[2] + pattern: '"logFile": "\$\{log_file\}"' + - matchRegex: + path: spec.template.spec.containers[0].command[2] + pattern: "firewall backend selected during CNI ADD" + - matchRegex: + path: spec.template.spec.containers[0].lifecycle.preStop.exec.command[2] + pattern: 'select\(\.type != "openshell-cni"\)' + + - it: scopes cni annotation lookup to explicit sandbox namespace + template: templates/cni-daemonset.yaml + set: + cni.enabled: true + server.sandboxNamespace: agents + asserts: + - matchRegex: + path: spec.template.spec.containers[0].command[2] + pattern: '"sandboxNamespaces": \["agents"\]' + + - it: fails cni-sidecar topology when cni installer is disabled + template: templates/cni-daemonset.yaml + set: + supervisor.topology: cni-sidecar + asserts: + - failedTemplate: + errorMessage: supervisor.topology=cni-sidecar requires cni.enabled=true so the OpenShell chained CNI plugin is installed on every node diff --git a/deploy/helm/openshell/tests/gateway_config_test.yaml b/deploy/helm/openshell/tests/gateway_config_test.yaml index 1619b8ffb..3130a1e34 100644 --- a/deploy/helm/openshell/tests/gateway_config_test.yaml +++ b/deploy/helm/openshell/tests/gateway_config_test.yaml @@ -25,6 +25,9 @@ tests: - equal: path: kind value: StatefulSet + - equal: + path: spec.selector.matchLabels["app.kubernetes.io/component"] + value: gateway - it: treats a null workload map as the default StatefulSet template: templates/statefulset.yaml @@ -117,6 +120,16 @@ tests: path: data["gateway.toml"] pattern: '(?ms)\[openshell\.drivers\.kubernetes\].*?process_enforcement\s*=\s*"network-only"' + - it: renders cni-sidecar supervisor topology under [openshell.drivers.kubernetes] + template: templates/gateway-config.yaml + set: + cni.enabled: true + supervisor.topology: cni-sidecar + asserts: + - matchRegex: + path: data["gateway.toml"] + pattern: '(?ms)\[openshell\.drivers\.kubernetes\].*?supervisor_topology\s*=\s*"cni-sidecar"' + - it: renders proxy uid under [openshell.drivers.kubernetes] template: templates/gateway-config.yaml set: @@ -324,6 +337,9 @@ tests: - equal: path: spec.replicas value: 2 + - equal: + path: spec.selector.matchLabels["app.kubernetes.io/component"] + value: gateway - equal: path: spec.template.spec.containers[0].name value: openshell-gateway diff --git a/deploy/helm/openshell/tests/service_test.yaml b/deploy/helm/openshell/tests/service_test.yaml new file mode 100644 index 000000000..16eca90cf --- /dev/null +++ b/deploy/helm/openshell/tests/service_test.yaml @@ -0,0 +1,13 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +suite: gateway service +templates: + - templates/service.yaml +tests: + - it: selects only gateway component pods + template: templates/service.yaml + asserts: + - equal: + path: spec.selector["app.kubernetes.io/component"] + value: gateway diff --git a/deploy/helm/openshell/tests/statefulset_client_ca_test.yaml b/deploy/helm/openshell/tests/statefulset_client_ca_test.yaml index a7b02310c..eaa51affc 100644 --- a/deploy/helm/openshell/tests/statefulset_client_ca_test.yaml +++ b/deploy/helm/openshell/tests/statefulset_client_ca_test.yaml @@ -10,6 +10,13 @@ release: namespace: my-namespace tests: + - it: labels gateway pods as gateway component + template: templates/statefulset.yaml + asserts: + - equal: + path: spec.template.metadata.labels["app.kubernetes.io/component"] + value: gateway + - it: mounts the server TLS secret ca.crt as client CA for built-in PKI template: templates/statefulset.yaml set: diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index ebef76737..1fbf03682 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -57,9 +57,36 @@ supervisor: processEnforcement: "network-only" # -- UID for the long-running network sidecar or proxy supervisor pod. In sidecar # topology, the network init container installs nftables rules that exempt - # this UID. + # this UID. In cni-sidecar topology, the CNI plugin installs equivalent + # nftables or iptables rules. proxyUid: 1337 +cni: + # -- Install the OpenShell chained CNI plugin with a privileged node DaemonSet. + # Required when supervisor.topology is "cni-sidecar". + enabled: false + image: + # -- CNI installer image repository. Empty uses supervisor.image.repository. + repository: "" + # -- CNI installer image pull policy. Empty uses supervisor.image.pullPolicy, then image.pullPolicy. + pullPolicy: "" + # -- CNI installer image tag. Empty uses supervisor.image.tag, then chart appVersion. + tag: "" + # -- Host CNI binary directory. + binDir: /opt/cni/bin + # -- Host CNI config directory. + confDir: /etc/cni/net.d + # -- Log level passed to the OpenShell CNI plugin. + logLevel: info + # -- Host log file written by the OpenShell CNI plugin and tailed by the installer DaemonSet. + logFile: /var/log/openshell-cni.log + # -- Host CNI conflist filename patched by the installer. Empty selects the first non-OpenShell .conflist. + configFile: "" + resources: {} + nodeSelector: {} + tolerations: [] + affinity: {} + # -- Image pull secrets attached to gateway and helper pods. imagePullSecrets: [] # -- Override the chart name used in generated resource names. diff --git a/docs/kubernetes/setup.mdx b/docs/kubernetes/setup.mdx index d9fd86c74..27bb801e0 100644 --- a/docs/kubernetes/setup.mdx +++ b/docs/kubernetes/setup.mdx @@ -163,6 +163,9 @@ The most commonly changed values are: | `supervisor.topology` | Sandbox pod topology. Refer to [Topology](/kubernetes/topology). | | `supervisor.processEnforcement` | Process/filesystem controls for non-combined topologies. Leave as `network-only` for the low-permission agent shape, or set `full` to keep combined-mode process/filesystem guards with added agent-container permissions. | | `supervisor.proxyUid` | Non-root UID for the long-running network sidecar or proxy supervisor pod. The UID must not match the sandbox UID. | +| `cni.enabled` | Install the privileged OpenShell CNI DaemonSet. Required when `supervisor.topology=cni-sidecar`. | +| `cni.configFile` | Existing host CNI conflist to patch. Leave empty to patch the first non-OpenShell `.conflist` in `cni.confDir`. | +| `cni.logFile` | Host log file written by the OpenShell CNI plugin and tailed by the installer DaemonSet. Defaults to `/var/log/openshell-cni.log`. | Use a values file for repeatable deployments: @@ -250,7 +253,7 @@ The gateway exposes `/healthz` for process liveness and `/readyz` for dependency ## Next Steps -- To choose between combined, sidecar, and proxy-pod sandbox topology, refer to [Topology](/kubernetes/topology). +- To choose between combined, sidecar, cni-sidecar, and proxy-pod sandbox topology, refer to [Topology](/kubernetes/topology). - To enable automatic certificate rotation with cert-manager, refer to [Managing Certificates](/kubernetes/managing-certificates). - To expose the gateway externally without port-forwarding, refer to [Ingress](/kubernetes/ingress). - To configure OIDC or reverse-proxy authentication, refer to [Access Control](/kubernetes/access-control). diff --git a/docs/kubernetes/topology.mdx b/docs/kubernetes/topology.mdx index 4f0e195d4..b5e22aceb 100644 --- a/docs/kubernetes/topology.mdx +++ b/docs/kubernetes/topology.mdx @@ -3,15 +3,16 @@ # SPDX-License-Identifier: Apache-2.0 title: "Kubernetes Sandbox Topology" sidebar-title: "Topology" -description: "Choose between combined, sidecar, and proxy-pod topology for Kubernetes sandbox pods." +description: "Choose between combined, sidecar, cni-sidecar, and proxy-pod topology for Kubernetes sandbox pods." keywords: "Generative AI, Cybersecurity, Kubernetes, Sandboxing, Sidecar, Network Policy, RuntimeClass" position: 2 --- Kubernetes sandbox pods can run the OpenShell supervisor in `combined`, -`sidecar`, or `proxy-pod` topology. Choose the topology based on which controls -you need inside the pod, how much privilege your cluster allows on the agent -container, and whether the cluster enforces Kubernetes NetworkPolicies. +`sidecar`, `cni-sidecar`, or `proxy-pod` topology. Choose the topology based on +which controls you need inside the pod, how much privilege your cluster allows +on the agent container, and whether the cluster can install node-level CNI or +NetworkPolicy enforcement. ## Choose a Topology @@ -23,6 +24,7 @@ lower-privilege agent container. |---|---|---| | `combined` | You need OpenShell network, filesystem, and process controls in the sandbox workload. | The agent container carries the Linux capabilities the supervisor needs. | | `sidecar` | You need the agent container to run as non-root without added Linux capabilities, and network policy is the primary control. | Defaults to network-only process supervision unless you opt in to `processEnforcement=full`. | +| `cni-sidecar` | You need sidecar-mode behavior without a privileged sandbox-pod init container, and a cluster admin can install the OpenShell CNI plugin on every node. | Requires privileged node-level CNI installation; defaults to network-only process supervision unless you opt in to `processEnforcement=full`. | | `proxy-pod` | You need network enforcement to run outside the agent pod and your cluster enforces Kubernetes NetworkPolicies. | Requires a NetworkPolicy-enforcing CNI or controller; defaults to network-only process supervision unless you opt in to `processEnforcement=full`. | ## Privilege Model @@ -35,6 +37,9 @@ The long-running container permissions differ by topology: | `sidecar` | Agent container, process-only supervisor (`network-only`) | `sandbox_uid:sandbox_gid` | `false` | Drops `ALL` | Agent and workload run without added Linux capabilities. | | `sidecar` | Agent container, process-only supervisor (`full`) | Root supervisor | Not explicitly disabled by the driver | Adds combined-mode capabilities | Agent keeps combined-mode process/filesystem guards. | | `sidecar` | Network supervisor sidecar | `proxyUid:sandbox_gid` | `false` | Drops `ALL` | Long-running proxy sidecar is also non-root without added capabilities. | +| `cni-sidecar` | Agent container, process-only supervisor (`network-only`) | `sandbox_uid:sandbox_gid` | `false` | Drops `ALL` | Agent and workload run without added Linux capabilities. | +| `cni-sidecar` | Agent container, process-only supervisor (`full`) | Root supervisor | Not explicitly disabled by the driver | Adds combined-mode capabilities | Agent keeps combined-mode process/filesystem guards. | +| `cni-sidecar` | Network supervisor sidecar | `proxyUid:sandbox_gid` | `false` | Drops `ALL` | Long-running proxy sidecar is non-root; node-level CNI installs the pod network rules. | | `proxy-pod` | Agent pod container, process-only supervisor (`network-only`) | `sandbox_uid:sandbox_gid` | `false` | Drops `ALL` | Agent and workload run without added Linux capabilities in their own pod. | | `proxy-pod` | Agent pod container, process-only supervisor (`full`) | Root supervisor | Not explicitly disabled by the driver | Adds combined-mode capabilities | Agent keeps combined-mode process/filesystem guards. | | `proxy-pod` | Supervisor pod container, network proxy only | `proxyUid:sandbox_gid` | `false` | Drops `ALL` | Long-running proxy runs outside the agent pod without added capabilities. | @@ -46,6 +51,7 @@ pod: |---|---|---|---|---|---| | `combined` | Supervisor install init container | `0` | Not set | Not set | Copies the supervisor binary into the agent container volume. | | `sidecar` | Network init container | `0` | `false` | Drops `ALL`; adds `NET_ADMIN`, `NET_RAW`, `CHOWN`, and `FOWNER` | Installs pod-local nftables rules and prepares shared sidecar state. | +| `cni-sidecar` | None for network rules | N/A | N/A | N/A | The OpenShell CNI DaemonSet installs rules during pod network setup. | | `proxy-pod` | Supervisor install init container | `0` | Not set | Not set | Copies the supervisor binary into the agent pod volume. | | `proxy-pod` | Proxy CA install init container | `0:sandbox_gid` | `false` | Drops `ALL` | Copies proxy CA material into the agent pod TLS volume. | @@ -151,6 +157,67 @@ dropping, or process/binary identity checks unless you opt in to `supervisor.processEnforcement=full`. +## CNI Sidecar Topology + +CNI sidecar topology keeps the sidecar runtime model but moves network rule +installation out of the sandbox pod. A privileged OpenShell DaemonSet installs a +chained CNI plugin on each node. During CNI `ADD`, the plugin reads OpenShell +pod annotations and installs the sidecar bypass-prevention rules in the pod +network namespace before the workload starts. + +```mermaid +flowchart TB + Sandbox["agents.x-k8s.io Sandbox"] + + subgraph Node["Kubernetes node"] + DaemonSet["OpenShell CNI DaemonSet"] + CNIPlugin["OpenShell chained CNI plugin"] + Runtime["container runtime
CNI ADD / CHECK"] + + subgraph Pod["Sandbox pod"] + NetNS["pod network namespace"] + + subgraph Agent["agent container"] + ProcessSupervisor["process supervisor
network-only by default"] + Workload["Agent workload"] + end + + NetworkSidecar["network supervisor sidecar
proxyUid"] + end + end + + Gateway["OpenShell Gateway"] + External["External services"] + + Sandbox --> Pod + DaemonSet -->|"installs binary + patches conflist"| CNIPlugin + Runtime -->|"invokes"| CNIPlugin + CNIPlugin -->|"reads pod annotations"| Pod + CNIPlugin -->|"installs nftables or iptables rules"| NetNS + ProcessSupervisor --> Workload + Workload -->|"egress redirected on loopback"| NetworkSidecar + NetworkSidecar -->|"gateway forwarding"| Gateway + NetworkSidecar -->|"policy-enforced egress"| External +``` + +Use this topology when a cluster admin can install node-level CNI components and +you want sandbox pods to avoid the sidecar topology's privileged network init +container. + +Each node must provide a firewall backend usable from the host-executed plugin. +The plugin prefers `nft` and falls back to `iptables` when `nft` is unavailable. +The installer DaemonSet writes plugin failures to `cni.logFile` and tails that +file, so `kubectl logs daemonset/openshell-cni -c install-cni` includes CNI +`ADD` and `CHECK` errors that would otherwise only appear in pod events or node +logs. + + +`cni-sidecar` is experimental. It currently targets normal Kubernetes runtimes +first. Kata Containers and gVisor remain validation targets, because each +runtime must honor the CNI-installed pod-network rules for this topology to +provide the expected network enforcement. + + ## Proxy-Pod Topology Proxy-pod topology moves network enforcement and gateway forwarding into a @@ -218,10 +285,10 @@ agent-to-supervisor isolation policy is only declarative. ## Credential Exposure -Sidecar and proxy-pod topologies use pod `fsGroup` and group-readable projected -credentials so the non-root process supervisor can authenticate to the gateway. -This includes the projected ServiceAccount token used for sandbox token -bootstrap and the sandbox client TLS secret. +Sidecar, cni-sidecar, and proxy-pod topologies use pod `fsGroup` and +group-readable projected credentials so the non-root process supervisor can +authenticate to the gateway. This includes the projected ServiceAccount token +used for sandbox token bootstrap and the sandbox client TLS secret. Treat the agent container as trusted with respect to those in-pod gateway credentials. Use `combined` topology when that credential exposure is not @@ -236,9 +303,11 @@ depends on the topology and runtime: when the cluster enforces NetworkPolicies. - `sidecar` is experimental with Kata Containers and is known to fail with gVisor because sidecar mode depends on pod-local network rule setup. +- `cni-sidecar` is intended to test whether CNI-installed pod-network rules can + make sidecar-style enforcement work on stricter runtime classes. Runtime classes do not re-enable OpenShell filesystem and process controls when -sidecar and proxy-pod modes use the default `network-only` process +sidecar, cni-sidecar, and proxy-pod modes use the default `network-only` process enforcement. Use RuntimeClass isolation as an additional workload boundary, not as a replacement for combined topology or `processEnforcement=full`. @@ -254,7 +323,9 @@ openshell sandbox create \ ## Enable Alternate Topologies For direct gateway TOML configuration, set the Kubernetes driver fields for -sidecar mode: +the topology you want to test. + +For sidecar mode: ```toml [openshell.drivers.kubernetes] @@ -263,7 +334,7 @@ process_enforcement = "network-only" proxy_uid = 1337 ``` -Set `supervisor_topology="proxy-pod"` to use proxy-pod mode: +For proxy-pod mode: ```toml [openshell.drivers.kubernetes] @@ -272,9 +343,22 @@ process_enforcement = "network-only" proxy_uid = 1337 ``` +For CNI-sidecar mode: + +```toml +[openshell.drivers.kubernetes] +supervisor_topology = "cni-sidecar" +process_enforcement = "network-only" +proxy_uid = 1337 +``` + +Leave `supervisor_topology` unset, or set it to `combined`, to keep the +original single-container supervisor path. + `proxy_uid` must be a non-root UID and must not match the sandbox UID. In sidecar mode, the network init container exempts this UID from proxy -redirection so the sidecar can reach the gateway. In proxy-pod mode, the same +redirection so the sidecar can reach the gateway. In CNI-sidecar mode, the +OpenShell CNI plugin installs equivalent rules. In proxy-pod mode, the same value is used as the non-root UID for the proxy supervisor pod created by the Deployment. @@ -301,9 +385,23 @@ supervisor: proxyUid: 1337 ``` -Leave `supervisor_topology` unset, or set it to `combined`, to keep the -original single-container supervisor path. For Helm installs, leave -`supervisor.topology` unset or set it to `combined`. +Set `supervisor.topology=cni-sidecar` and enable the CNI installer to use CNI +sidecar mode: + +```yaml +cni: + enabled: true +supervisor: + topology: cni-sidecar + processEnforcement: network-only + proxyUid: 1337 +``` + +The OpenShell CNI installer requires privileged node access so it can copy the +plugin into the host CNI binary directory and patch the node CNI conflist. + +For Helm installs, leave `supervisor.topology` unset or set it to `combined` to +keep the original single-container supervisor path. ## Next Steps diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index 717b049b4..3c9919b5e 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -180,8 +180,13 @@ supervisor_sideload_method = "image-volume" # "combined" runs the existing single supervisor container with full process, # filesystem, and network enforcement in the agent container. "sidecar" moves # pod-level network enforcement and gateway forwarding into a network sidecar. +# "cni-sidecar" keeps the sidecar runtime model but expects an OpenShell CNI +# plugin to install the pod-network rules before the workload starts. # "proxy-pod" moves network enforcement and gateway forwarding into a separate # supervisor Deployment and uses NetworkPolicy to force agent egress through it. +# In sidecar, cni-sidecar, and proxy-pod modes, the agent container runs +# non-root with no added Linux capabilities and process/filesystem enforcement +# is network-only. supervisor_topology = "combined" # Process/filesystem controls for non-combined topologies. "network-only" # keeps the low-permission agent shape; "full" grants combined-mode @@ -189,7 +194,8 @@ supervisor_topology = "combined" process_enforcement = "network-only" # UID used by the long-running network sidecar or proxy supervisor pod. In # sidecar topology, the network init container installs nftables rules that -# exempt this UID. +# exempt this UID. In cni-sidecar topology, the OpenShell CNI plugin installs +# equivalent nftables or iptables rules. proxy_uid = 1337 grpc_endpoint = "https://openshell-gateway.agents.svc:8080" ssh_socket_path = "/run/openshell/ssh.sock" diff --git a/docs/reference/sandbox-compute-drivers.mdx b/docs/reference/sandbox-compute-drivers.mdx index ff8331050..3829fcb62 100644 --- a/docs/reference/sandbox-compute-drivers.mdx +++ b/docs/reference/sandbox-compute-drivers.mdx @@ -304,32 +304,35 @@ For maintainer-level implementation details, refer to the [Kubernetes driver REA | `supervisor_image` | `supervisor.image.repository` / `supervisor.image.tag` | Set the supervisor image that provides the `openshell-sandbox` binary. | | `supervisor_image_pull_policy` | `supervisor.image.pullPolicy` | Set the Kubernetes image pull policy for the supervisor image. | | `supervisor_sideload_method` | `supervisor.sideloadMethod` | How the supervisor binary is delivered into sandbox pods. Leave empty to auto-detect from cluster version. Set to `image-volume` to mount the supervisor OCI image directly as a volume (requires Kubernetes 1.33+ with the ImageVolume feature gate; GA in 1.36), or `init-container` to copy it through an init container on older clusters. | -| `supervisor_topology` | `supervisor.topology` | Set `combined` for the default single supervisor path, `sidecar` to move pod-level network enforcement and gateway forwarding into a dedicated sidecar, or `proxy-pod` to run network enforcement in a separate supervisor Deployment with NetworkPolicy isolation. | +| `supervisor_topology` | `supervisor.topology` | Set `combined` for the default single supervisor path, `sidecar` to move pod-level network enforcement and gateway forwarding into a dedicated sidecar, `cni-sidecar` to use that sidecar runtime model with CNI-installed pod-network rules, or `proxy-pod` to run network enforcement in a separate supervisor Deployment with NetworkPolicy isolation. | | `process_enforcement` | `supervisor.processEnforcement` | Process/filesystem controls for non-combined topologies. `network-only` keeps the low-permission agent shape. `full` grants combined-mode process/filesystem controls to the agent process supervisor. | -| `proxy_uid` | `supervisor.proxyUid` | UID used by the long-running network sidecar or proxy supervisor pod. In `sidecar` topology, the network init container exempts this UID from proxy redirection. | +| `proxy_uid` | `supervisor.proxyUid` | UID used by the long-running network sidecar or proxy supervisor pod. In `sidecar` topology, the network init container exempts this UID from proxy redirection. In `cni-sidecar` topology, the OpenShell CNI plugin installs equivalent nftables or iptables exemptions. | | `app_armor_profile` | `server.appArmorProfile` | Set the sandbox agent container's AppArmor profile. Helm defaults this to `Unconfined` so AppArmor-enabled nodes do not block supervisor network namespace setup. Set the Helm value to an empty string to omit the field, or use `RuntimeDefault` or `Localhost/` for operator-managed profiles. | | `workspace_default_storage_size` | `server.workspaceDefaultStorageSize` | Set the default workspace PVC size for new sandboxes. | | `sa_token_ttl_secs` | `server.sandboxJwt.k8sSaTokenTtlSecs` | Set the projected ServiceAccount token TTL used for the bootstrap token exchange. | In `combined` topology, the agent container carries the Linux capabilities needed by the supervisor for network namespace setup, Landlock filesystem -policy, process privilege changes, and network policy enforcement. In `sidecar` -and `proxy-pod` topology, the agent container runs as the resolved sandbox -UID/GID with no added Linux capabilities. Sidecar mode uses a root init -container for nftables setup and a long-running non-root sidecar. Proxy-pod mode -creates a separate non-root supervisor Deployment with one pod, a headless -Service, a proxy CA Secret, and per-sandbox NetworkPolicies. The Deployment -recreates the supervisor pod if it is deleted. Both modes keep gateway session -and SSH behavior, but the process supervisor runs in `network-only` mode by -default: filesystem policy, process privilege dropping, and process/binary -identity checks are not applied by the process container. Set +policy, process privilege changes, and network policy enforcement. In +`sidecar`, `cni-sidecar`, and `proxy-pod` topology, the agent container runs as +the resolved sandbox UID/GID with no added Linux capabilities. Sidecar mode uses +a root init container for nftables setup and a long-running non-root sidecar. +CNI-sidecar mode keeps that sidecar runtime model but requires the privileged +OpenShell CNI DaemonSet to install nftables or iptables pod-network rules before +the workload starts. +Proxy-pod mode creates a separate non-root supervisor Deployment with one pod, a +headless Service, a proxy CA Secret, and per-sandbox NetworkPolicies. The +Deployment recreates the supervisor pod if it is deleted. All three alternate +modes keep gateway session and SSH behavior, but the process supervisor runs in +`network-only` mode by default: filesystem policy, process privilege dropping, +and process/binary identity checks are not applied by the process container. Set `process_enforcement = "full"` only when you want those combined-mode process/filesystem guards and accept the added agent-container permissions. -Sidecar mode uses pod `fsGroup` so the non-root process supervisor can read the -projected ServiceAccount token and sandbox client TLS secret required for -gateway authentication. Treat the workload container as trusted with respect to -those in-pod gateway credentials. +Sidecar, cni-sidecar, and proxy-pod modes use pod `fsGroup` so the non-root +process supervisor can read the projected ServiceAccount token and sandbox +client TLS secret required for gateway authentication. Treat the workload +container as trusted with respect to those in-pod gateway credentials. The Kubernetes driver creates namespaced `agents.x-k8s.io` `Sandbox` resources from the Kubernetes SIG Apps [agent-sandbox](https://github.com/kubernetes-sigs/agent-sandbox) project. It detects the served Sandbox API at runtime, caches the selected API version for the gateway process, and uses `v1beta1` when available before falling back to `v1alpha1`, so supported Agent Sandbox installations work without version-specific operator configuration. The Agent Sandbox controller turns those resources into sandbox pods and related storage. diff --git a/tasks/helm.toml b/tasks/helm.toml index a26c7ecae..afbc82676 100644 --- a/tasks/helm.toml +++ b/tasks/helm.toml @@ -65,6 +65,11 @@ description = "Run skaffold dev with proxy-pod topology; requires NetworkPolicy dir = "deploy/helm/openshell" run = "skaffold dev -p proxy-pod" +["helm:skaffold:dev:cni-sidecar"] +description = "Run skaffold dev with CNI sidecar topology; requires OpenShell CNI installer permissions on nodes" +dir = "deploy/helm/openshell" +run = "skaffold dev -p cni-sidecar" + ["helm:skaffold:run"] description = "Run skaffold run for deploy/helm/openshell (one-shot deploy)" dir = "deploy/helm/openshell" @@ -80,6 +85,11 @@ description = "Run skaffold run with proxy-pod topology; requires NetworkPolicy dir = "deploy/helm/openshell" run = "skaffold run -p proxy-pod" +["helm:skaffold:run:cni-sidecar"] +description = "Run skaffold run with CNI sidecar topology; requires OpenShell CNI installer permissions on nodes" +dir = "deploy/helm/openshell" +run = "skaffold run -p cni-sidecar" + ["helm:skaffold:delete"] description = "Run skaffold delete for deploy/helm/openshell" dir = "deploy/helm/openshell" @@ -95,6 +105,11 @@ description = "Run skaffold delete for the Kubernetes proxy-pod topology" dir = "deploy/helm/openshell" run = "skaffold delete -p proxy-pod" +["helm:skaffold:delete:cni-sidecar"] +description = "Run skaffold delete for the Kubernetes CNI sidecar topology" +dir = "deploy/helm/openshell" +run = "skaffold delete -p cni-sidecar" + ["helm:skaffold:diagnose"] description = "Run skaffold diagnose for deploy/helm/openshell" dir = "deploy/helm/openshell" diff --git a/tasks/scripts/docker-build-image.sh b/tasks/scripts/docker-build-image.sh index 8570180f1..b4d79389f 100755 --- a/tasks/scripts/docker-build-image.sh +++ b/tasks/scripts/docker-build-image.sh @@ -44,7 +44,7 @@ required_prebuilt_binaries() { echo "openshell-gateway" ;; supervisor|supervisor-sideload|supervisor-output) - echo "openshell-sandbox" + echo "openshell-sandbox openshell-cni" ;; esac } diff --git a/tasks/scripts/stage-prebuilt-binaries.sh b/tasks/scripts/stage-prebuilt-binaries.sh index 8d1334b8c..f50603db0 100755 --- a/tasks/scripts/stage-prebuilt-binaries.sh +++ b/tasks/scripts/stage-prebuilt-binaries.sh @@ -9,7 +9,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" usage() { - echo "Usage: stage-prebuilt-binaries.sh " >&2 + echo "Usage: stage-prebuilt-binaries.sh " >&2 } normalize_arch() { @@ -88,7 +88,10 @@ components_for_target() { echo "gateway" ;; sandbox|supervisor|supervisor-output) - echo "supervisor" + echo "supervisor cni" + ;; + cni) + echo "cni" ;; all) echo "gateway supervisor" @@ -112,6 +115,11 @@ resolve_component() { binary=openshell-sandbox target_libc=musl ;; + cni) + crate=openshell-cni + binary=openshell-cni + target_libc=musl + ;; *) echo "unsupported binary component: $1" >&2 exit 1 diff --git a/tasks/test.toml b/tasks/test.toml index b8165fd82..4be031862 100644 --- a/tasks/test.toml +++ b/tasks/test.toml @@ -124,6 +124,11 @@ description = "Run Kubernetes e2e with the proxy-pod topology overlay; requires env = { OPENSHELL_E2E_KUBE_EXTRA_VALUES = "deploy/helm/openshell/ci/values-proxy-pod.yaml" } run = "e2e/rust/e2e-kubernetes.sh" +["e2e:kubernetes:cni-sidecar"] +description = "Run Kubernetes e2e with the CNI sidecar topology overlay; requires OpenShell CNI installer permissions on nodes" +env = { OPENSHELL_E2E_KUBE_EXTRA_VALUES = "deploy/helm/openshell/ci/values-cni-sidecar.yaml" } +run = "e2e/rust/e2e-kubernetes.sh" + ["e2e:kubernetes:db"] description = "Run Kubernetes e2e with all database backend scenarios (SQLite and external PostgreSQL with existingSecret)" env = { OPENSHELL_E2E_KUBE_DB_SCENARIOS = "1" }