From fd09c75f9a78be56c134076a0b5bfc33f2a6d25e Mon Sep 17 00:00:00 2001 From: Florent Benoit Date: Tue, 30 Jun 2026 22:32:44 +0200 Subject: [PATCH] fix(core): pin supervisor image tag to gateway version for all drivers The Podman and Kubernetes drivers defaulted the supervisor image to `:latest` via DEFAULT_SUPERVISOR_IMAGE, while the Docker driver already resolved a version-pinned tag. Extract the tag resolution logic into openshell-core so all three drivers use the same OPENSHELL_IMAGE_TAG > IMAGE_TAG > CARGO_PKG_VERSION priority chain. Closes #2068 Signed-off-by: Florent Benoit --- crates/openshell-core/src/config.rs | 90 ++++++++++++++++++- crates/openshell-driver-docker/src/lib.rs | 37 +------- crates/openshell-driver-docker/src/tests.rs | 21 ++--- .../openshell-driver-kubernetes/src/config.rs | 4 +- .../openshell-driver-kubernetes/src/main.rs | 2 +- crates/openshell-driver-podman/src/config.rs | 4 +- .../openshell-driver-podman/src/container.rs | 5 +- crates/openshell-driver-podman/src/main.rs | 6 +- docs/reference/gateway-config.mdx | 12 ++- 9 files changed, 119 insertions(+), 62 deletions(-) diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs index ba6b9d401..fbd06e2bd 100644 --- a/crates/openshell-core/src/config.rs +++ b/crates/openshell-core/src/config.rs @@ -37,8 +37,50 @@ pub const DEFAULT_DOCKER_NETWORK_NAME: &str = "openshell-docker"; /// Default domain used for browser-facing sandbox service URLs. pub const DEFAULT_SERVICE_ROUTING_DOMAIN: &str = "openshell.localhost"; -/// Default OCI image for the openshell-sandbox supervisor binary. -pub const DEFAULT_SUPERVISOR_IMAGE: &str = "ghcr.io/nvidia/openshell/supervisor:latest"; +/// Default OCI repository for the supervisor image (no tag). +pub const DEFAULT_SUPERVISOR_IMAGE_REPO: &str = "ghcr.io/nvidia/openshell/supervisor"; + +/// Return the default supervisor image reference with a version-pinned tag. +#[must_use] +pub fn default_supervisor_image() -> String { + format!( + "{DEFAULT_SUPERVISOR_IMAGE_REPO}:{}", + default_supervisor_image_tag() + ) +} + +fn default_supervisor_image_tag() -> String { + resolve_supervisor_image_tag( + option_env!("OPENSHELL_IMAGE_TAG"), + option_env!("IMAGE_TAG"), + env!("CARGO_PKG_VERSION"), + ) +} + +/// Resolve the supervisor image tag from build-time inputs. +/// +/// Priority: `OPENSHELL_IMAGE_TAG` > `IMAGE_TAG` > `CARGO_PKG_VERSION`. +/// Falls back to `"dev"` when the Cargo version is empty or `"0.0.0"`. +/// Replaces `+` with `-` for OCI tag compatibility. +#[must_use] +pub fn resolve_supervisor_image_tag( + openshell_image_tag: Option<&str>, + image_tag: Option<&str>, + cargo_pkg_version: &str, +) -> String { + let tag = openshell_image_tag + .filter(|tag| !tag.is_empty()) + .or_else(|| image_tag.filter(|tag| !tag.is_empty())) + .unwrap_or_else(|| { + if cargo_pkg_version.is_empty() || cargo_pkg_version == "0.0.0" { + "dev" + } else { + cargo_pkg_version + } + }); + + tag.replace('+', "-") +} /// CDI device identifier for requesting all NVIDIA GPUs. pub const CDI_GPU_DEVICE_ALL: &str = "nvidia.com/gpu=all"; @@ -1064,4 +1106,48 @@ mod tests { } } } + + #[test] + fn supervisor_image_tag_prefers_explicit_build_tags() { + use super::resolve_supervisor_image_tag; + assert_eq!( + resolve_supervisor_image_tag(Some("1.2.3"), Some("sha"), "0.0.0"), + "1.2.3", + ); + assert_eq!( + resolve_supervisor_image_tag(None, Some("sha"), "0.0.0"), + "sha", + ); + assert_eq!(resolve_supervisor_image_tag(None, None, "1.2.3"), "1.2.3",); + assert_eq!( + resolve_supervisor_image_tag(Some(""), Some(""), "0.0.0"), + "dev", + ); + assert_eq!( + resolve_supervisor_image_tag(Some("latest"), None, "1.2.3"), + "latest", + ); + } + + #[test] + fn supervisor_image_tag_sanitizes_build_metadata_for_oci() { + use super::resolve_supervisor_image_tag; + assert_eq!( + resolve_supervisor_image_tag(None, None, "0.0.37-dev.156+g1d3b741ee"), + "0.0.37-dev.156-g1d3b741ee", + ); + assert_eq!( + resolve_supervisor_image_tag(Some("0.0.37-dev.156+g1d3b741ee"), None, "0.0.0"), + "0.0.37-dev.156-g1d3b741ee", + ); + } + + #[test] + fn default_supervisor_image_is_version_pinned() { + use super::default_supervisor_image; + let image = default_supervisor_image(); + assert!(image.starts_with("ghcr.io/nvidia/openshell/supervisor:")); + let tag = image.rsplit_once(':').unwrap().1; + assert!(!tag.is_empty()); + } } diff --git a/crates/openshell-driver-docker/src/lib.rs b/crates/openshell-driver-docker/src/lib.rs index a59352018..2eb2b3cf9 100644 --- a/crates/openshell-driver-docker/src/lib.rs +++ b/crates/openshell-driver-docker/src/lib.rs @@ -78,55 +78,24 @@ const HOST_OPENSHELL_INTERNAL: &str = "host.openshell.internal"; const HOST_DOCKER_INTERNAL: &str = "host.docker.internal"; const DOCKER_NETWORK_DRIVER: &str = "bridge"; -/// Default image holding the Linux `openshell-sandbox` binary. The gateway -/// pulls this image and extracts the binary to a host-side cache when no -/// explicit `supervisor_bin`, configured `supervisor_image`, sibling binary, -/// or local build is available. -const DEFAULT_DOCKER_SUPERVISOR_IMAGE_REPO: &str = "ghcr.io/nvidia/openshell/supervisor"; - /// Return the default `ghcr.io/nvidia/openshell/supervisor:` reference /// used when no supervisor binary override is provided. pub fn default_docker_supervisor_image() -> String { format!( - "{DEFAULT_DOCKER_SUPERVISOR_IMAGE_REPO}:{}", + "{}:{}", + openshell_core::config::DEFAULT_SUPERVISOR_IMAGE_REPO, default_docker_supervisor_image_tag() ) } -/// Image tag baked in at compile time to pair the gateway with a matching -/// supervisor image. -/// -/// Build pipelines pass `OPENSHELL_IMAGE_TAG` explicitly. The `IMAGE_TAG` -/// fallback covers image build wrappers that already tag the gateway and -/// supervisor together. Standalone release binaries also patch the Cargo -/// package version, so use it when it has been set to a real release value. fn default_docker_supervisor_image_tag() -> String { - resolve_default_docker_supervisor_image_tag( + openshell_core::config::resolve_supervisor_image_tag( option_env!("OPENSHELL_IMAGE_TAG"), option_env!("IMAGE_TAG"), env!("CARGO_PKG_VERSION"), ) } -fn resolve_default_docker_supervisor_image_tag( - openshell_image_tag: Option<&'static str>, - image_tag: Option<&'static str>, - cargo_pkg_version: &'static str, -) -> String { - let tag = openshell_image_tag - .filter(|tag| !tag.is_empty()) - .or_else(|| image_tag.filter(|tag| !tag.is_empty())) - .unwrap_or_else(|| { - if cargo_pkg_version.is_empty() || cargo_pkg_version == "0.0.0" { - "dev" - } else { - cargo_pkg_version - } - }); - - tag.replace('+', "-") -} - /// Queried by the Docker driver to decide when a sandbox's supervisor /// relay is live. Implementations return `true` once a sandbox has an /// active `ConnectSupervisor` session registered. diff --git a/crates/openshell-driver-docker/src/tests.rs b/crates/openshell-driver-docker/src/tests.rs index 923c6d618..18b574aa1 100644 --- a/crates/openshell-driver-docker/src/tests.rs +++ b/crates/openshell-driver-docker/src/tests.rs @@ -1897,36 +1897,31 @@ fn configured_supervisor_image_takes_precedence_over_local_binaries() { #[test] fn docker_supervisor_image_tag_prefers_explicit_build_tags() { + use openshell_core::config::resolve_supervisor_image_tag; assert_eq!( - resolve_default_docker_supervisor_image_tag(Some("1.2.3"), Some("sha"), "0.0.0"), + resolve_supervisor_image_tag(Some("1.2.3"), Some("sha"), "0.0.0"), "1.2.3", ); assert_eq!( - resolve_default_docker_supervisor_image_tag(None, Some("sha"), "0.0.0"), + resolve_supervisor_image_tag(None, Some("sha"), "0.0.0"), "sha", ); + assert_eq!(resolve_supervisor_image_tag(None, None, "1.2.3"), "1.2.3",); assert_eq!( - resolve_default_docker_supervisor_image_tag(None, None, "1.2.3"), - "1.2.3", - ); - assert_eq!( - resolve_default_docker_supervisor_image_tag(Some(""), Some(""), "0.0.0"), + resolve_supervisor_image_tag(Some(""), Some(""), "0.0.0"), "dev", ); } #[test] fn docker_supervisor_image_tag_sanitizes_build_metadata_for_docker() { + use openshell_core::config::resolve_supervisor_image_tag; assert_eq!( - resolve_default_docker_supervisor_image_tag(None, None, "0.0.37-dev.156+g1d3b741ee"), + resolve_supervisor_image_tag(None, None, "0.0.37-dev.156+g1d3b741ee"), "0.0.37-dev.156-g1d3b741ee", ); assert_eq!( - resolve_default_docker_supervisor_image_tag( - Some("0.0.37-dev.156+g1d3b741ee"), - None, - "0.0.0", - ), + resolve_supervisor_image_tag(Some("0.0.37-dev.156+g1d3b741ee"), None, "0.0.0",), "0.0.37-dev.156-g1d3b741ee", ); } diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index d1a5a5814..de1c00890 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -use openshell_core::config::DEFAULT_SUPERVISOR_IMAGE; +use openshell_core::config; use serde::{Deserialize, Deserializer, Serialize}; use std::path::Path; use std::str::FromStr; @@ -263,7 +263,7 @@ impl Default for KubernetesComputeConfig { // is Podman vocabulary and is not a valid Kubernetes value. image_pull_policy: String::new(), image_pull_secrets: Vec::new(), - supervisor_image: DEFAULT_SUPERVISOR_IMAGE.to_string(), + supervisor_image: config::default_supervisor_image(), supervisor_image_pull_policy: String::new(), supervisor_sideload_method: SupervisorSideloadMethod::default(), supervisor_topology: SupervisorTopology::default(), diff --git a/crates/openshell-driver-kubernetes/src/main.rs b/crates/openshell-driver-kubernetes/src/main.rs index d755613d6..2f0f175e8 100644 --- a/crates/openshell-driver-kubernetes/src/main.rs +++ b/crates/openshell-driver-kubernetes/src/main.rs @@ -121,7 +121,7 @@ async fn main() -> Result<()> { image_pull_secrets: args.sandbox_image_pull_secrets, supervisor_image: args .supervisor_image - .unwrap_or_else(|| openshell_core::config::DEFAULT_SUPERVISOR_IMAGE.to_string()), + .unwrap_or_else(openshell_core::config::default_supervisor_image), supervisor_image_pull_policy: args.supervisor_image_pull_policy.unwrap_or_default(), supervisor_sideload_method: args.supervisor_sideload_method, supervisor_topology: args.supervisor_topology, diff --git a/crates/openshell-driver-podman/src/config.rs b/crates/openshell-driver-podman/src/config.rs index 0e29f52dd..def8e5f3d 100644 --- a/crates/openshell-driver-podman/src/config.rs +++ b/crates/openshell-driver-podman/src/config.rs @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -use openshell_core::config::{DEFAULT_STOP_TIMEOUT_SECS, DEFAULT_SUPERVISOR_IMAGE}; +use openshell_core::config::DEFAULT_STOP_TIMEOUT_SECS; use std::net::IpAddr; use std::path::PathBuf; use std::str::FromStr; @@ -255,7 +255,7 @@ impl Default for PodmanComputeConfig { network_name: DEFAULT_NETWORK_NAME.to_string(), host_gateway_ip: Self::default_host_gateway_ip(), stop_timeout_secs: DEFAULT_STOP_TIMEOUT_SECS, - supervisor_image: DEFAULT_SUPERVISOR_IMAGE.to_string(), + supervisor_image: openshell_core::config::default_supervisor_image(), guest_tls_ca: None, guest_tls_cert: None, guest_tls_key: None, diff --git a/crates/openshell-driver-podman/src/container.rs b/crates/openshell-driver-podman/src/container.rs index ba47e4eaa..fc82d830b 100644 --- a/crates/openshell-driver-podman/src/container.rs +++ b/crates/openshell-driver-podman/src/container.rs @@ -1798,7 +1798,7 @@ mod tests { let vol = &image_volumes[0]; assert_eq!( vol["source"].as_str(), - Some("ghcr.io/nvidia/openshell/supervisor:latest"), + Some(openshell_core::config::default_supervisor_image().as_str()), "image volume source should be the supervisor image" ); assert_eq!( @@ -1884,8 +1884,9 @@ mod tests { let image_volumes = spec["image_volumes"] .as_array() .expect("image_volumes should be an array"); + let expected_supervisor = openshell_core::config::default_supervisor_image(); assert!(image_volumes.iter().any(|volume| { - volume["source"].as_str() == Some("ghcr.io/nvidia/openshell/supervisor:latest") + volume["source"].as_str() == Some(expected_supervisor.as_str()) && volume["destination"].as_str() == Some("/opt/openshell/bin") })); assert!(image_volumes.iter().any(|volume| { diff --git a/crates/openshell-driver-podman/src/main.rs b/crates/openshell-driver-podman/src/main.rs index e6ba7b9ff..c57aff427 100644 --- a/crates/openshell-driver-podman/src/main.rs +++ b/crates/openshell-driver-podman/src/main.rs @@ -90,7 +90,7 @@ struct Args { /// OCI image containing the openshell-sandbox supervisor binary. #[arg(long, env = "OPENSHELL_SUPERVISOR_IMAGE")] - supervisor_image: String, + supervisor_image: Option, /// Host path to the CA certificate for sandbox mTLS. #[arg(long, env = "OPENSHELL_PODMAN_TLS_CA")] @@ -130,7 +130,9 @@ async fn main() -> Result<()> { sandbox_ssh_socket_path: args.sandbox_ssh_socket_path, network_name: args.network_name, stop_timeout_secs: args.stop_timeout, - supervisor_image: args.supervisor_image, + supervisor_image: args + .supervisor_image + .unwrap_or_else(openshell_core::config::default_supervisor_image), guest_tls_ca: args.podman_tls_ca, guest_tls_cert: args.podman_tls_cert, guest_tls_key: args.podman_tls_key, diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index 9fa3a45fc..37c7ec020 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -88,7 +88,8 @@ disable_tls = false # Shared driver defaults. These inherit into [openshell.drivers.] tables # when the driver-specific table does not override them. default_image = "ghcr.io/nvidia/openshell/sandbox:latest" -supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" +# Defaults to the gateway version; override to pin a specific build. +# supervisor_image = "ghcr.io/nvidia/openshell/supervisor:" client_tls_secret_name = "openshell-client-tls" service_account_name = "openshell-sandbox" host_gateway_ip = "10.0.0.1" @@ -172,7 +173,8 @@ service_account_name = "openshell-sandbox" default_image = "ghcr.io/nvidia/openshell/sandbox:latest" image_pull_policy = "IfNotPresent" image_pull_secrets = ["regcred"] -supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" +# Defaults to the gateway version; override to pin a specific build. +# supervisor_image = "ghcr.io/nvidia/openshell/supervisor:" supervisor_image_pull_policy = "IfNotPresent" # Use the image volume on Kubernetes >= 1.35 (GA in 1.36); switch to "init-container" # on older clusters or where the ImageVolume feature gate is off. @@ -222,7 +224,8 @@ grpc_endpoint = "https://host.openshell.internal:17670" # Skip the image-pull-and-extract step by pointing at a locally built binary. supervisor_bin = "/usr/local/libexec/openshell/openshell-sandbox" # When supervisor_bin is omitted, Docker extracts /openshell-sandbox from this image. -supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" +# Defaults to the gateway version; override to pin a specific build. +# supervisor_image = "ghcr.io/nvidia/openshell/supervisor:" guest_tls_ca = "/etc/openshell/certs/ca.pem" guest_tls_cert = "/etc/openshell/certs/client.pem" guest_tls_key = "/etc/openshell/certs/client-key.pem" @@ -264,7 +267,8 @@ network_name = "openshell" # host_gateway_ip = "192.168.127.254" sandbox_ssh_socket_path = "/run/openshell/ssh.sock" stop_timeout_secs = 10 -supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" +# Defaults to the gateway version; override to pin a specific build. +# supervisor_image = "ghcr.io/nvidia/openshell/supervisor:" guest_tls_ca = "/etc/openshell/certs/ca.pem" guest_tls_cert = "/etc/openshell/certs/client.pem" guest_tls_key = "/etc/openshell/certs/client-key.pem"