diff --git a/.github/workflows/branch-checks.yml b/.github/workflows/branch-checks.yml index 7713febb6..5d5f3fd5f 100644 --- a/.github/workflows/branch-checks.yml +++ b/.github/workflows/branch-checks.yml @@ -172,6 +172,36 @@ jobs: - name: Test run: mise run test:python + rootless-caps: + name: Rootless capability tests + needs: pr_metadata + if: needs.pr_metadata.outputs.should_run == 'true' + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + + - uses: actions-rust-lang/setup-rust-toolchain@150fca883cd4034361b621bd4e6a9d34e5143606 # v1.15.4 + with: + toolchain: "1.95.0" + cache: false + + - name: Run supervisor capability tests without CAP_SETPCAP + run: | + sudo useradd -m testuser + sudo chmod a+rx /home/runner /home/runner/work /home/runner/work/OpenShell + sudo chmod -R a+rX "$GITHUB_WORKSPACE" + sudo cp -r /home/runner/.rustup /home/testuser/.rustup + sudo chown -R testuser: /home/testuser/.rustup + sudo mkdir -p /home/testuser/.cargo + sudo cp /home/runner/.cargo/config.toml /home/testuser/.cargo/ 2>/dev/null || true + sudo chown -R testuser: /home/testuser/.cargo + sudo -u testuser env \ + PATH="/home/testuser/.cargo/bin:/home/testuser/.rustup/toolchains/1.95.0-x86_64-unknown-linux-gnu/bin:$PATH" \ + CARGO_HOME="/home/testuser/.cargo" \ + RUSTUP_HOME="/home/testuser/.rustup" \ + CARGO_TARGET_DIR="/home/testuser/target" \ + bash -c "cd $GITHUB_WORKSPACE && cargo test -p openshell-supervisor-process --lib -- capability_bounding drop_privileges" + markdown: name: Markdown needs: pr_metadata diff --git a/architecture/sandbox.md b/architecture/sandbox.md index 580d8f96d..c88c1ffbb 100644 --- a/architecture/sandbox.md +++ b/architecture/sandbox.md @@ -16,10 +16,14 @@ Each sandbox workload has two trust levels: The supervisor keeps enough privilege to manage the sandbox, but the agent child loses that privilege before user code runs. On Linux, child setup clears the capability bounding set during privilege drop so later execs cannot regain -container-granted capabilities. This is fail-closed: the supervisor retains -`CAP_SETPCAP` solely to perform the clear, and spawning the workload or SSH shell -aborts unless the bounding set ends up empty. A `setpcap` `EPERM` is tolerated -only when the set is already empty; any other outcome fails the spawn. +container-granted capabilities. When `CAP_SETPCAP` is available, this is +fail-closed: the supervisor clears the bounding set and aborts if capabilities +remain. When `CAP_SETPCAP` is unavailable (rootless Podman with AppArmor +user-namespace restrictions, or similar environments), the supervisor logs a +warning, emits an OCSF `DetectionFinding` alert, and continues with the +bounding set intact. In this degraded mode the child process relies on seccomp +to block dangerous syscalls; Landlock filesystem restrictions are applied +independently and may also be active. ## Startup Flow diff --git a/crates/openshell-supervisor-process/src/process.rs b/crates/openshell-supervisor-process/src/process.rs index c1b6b4532..3d54c8939 100644 --- a/crates/openshell-supervisor-process/src/process.rs +++ b/crates/openshell-supervisor-process/src/process.rs @@ -27,6 +27,8 @@ use std::process::Stdio; use std::sync::OnceLock; use tokio::process::{Child, Command}; use tracing::debug; +#[cfg(target_os = "linux")] +use tracing::warn; const SUPERVISOR_ONLY_ENV_VARS: &[&str] = &[ openshell_core::sandbox_env::SANDBOX_TOKEN, @@ -189,12 +191,84 @@ fn validate_capability_bounding_set_clear( "Failed to clear unknown child capability bounding set entries: {unknown_err}" )), }, + Err(err) if err.code() == libc::EPERM => { + warn!( + ?remaining, + "CAP_SETPCAP is unavailable and the child capability bounding set is non-empty; \ + the child process relies on seccomp for confinement" + ); + Ok(()) + } Err(err) => Err(miette::miette!( "Failed to clear child capability bounding set: {err}" )), } } +/// Probe capability bounding-set availability and emit an OCSF +/// `DetectionFinding` from the parent process when `bounding::clear()` +/// would fail and the bounding set is non-empty. Called once before +/// `pre_exec`/`fork()` so the event reaches the tracing subscriber. +/// +/// The probe tries a non-destructive `bounding::drop()` on a capability +/// that is already absent from the bounding set. This triggers the same +/// `prctl(PR_CAPBSET_DROP)` syscall that `bounding::clear()` uses, so +/// `AppArmor` restrictions that block the syscall are detected even when +/// `CAP_SETPCAP` is nominally present in the effective set. +#[cfg(target_os = "linux")] +fn log_capability_bounding_set_readiness() { + use std::sync::Once; + static PROBED: Once = Once::new(); + let mut already_probed = true; + PROBED.call_once(|| already_probed = false); + if already_probed { + return; + } + + let bounding = capctl::caps::bounding::probe(); + if bounding.is_empty() { + return; + } + + // Find a capability NOT in the bounding set so that drop() is a no-op + // when the syscall is permitted. If every known capability is raised + // (unusual), skip the probe — clear() will be attempted in the child + // and the warn!() path handles failure there. + let probe_cap = capctl::caps::Cap::iter().find(|cap| !bounding.has(*cap)); + let clear_blocked = probe_cap.is_some_and(|cap| { + capctl::caps::bounding::drop(cap).is_err_and(|e| e.code() == libc::EPERM) + }); + + if !clear_blocked { + return; + } + + openshell_ocsf::ocsf_emit!( + openshell_ocsf::DetectionFindingBuilder::new(openshell_ocsf::ctx::ctx()) + .activity(openshell_ocsf::ActivityId::Open) + .severity(openshell_ocsf::SeverityId::High) + .confidence(openshell_ocsf::ConfidenceId::High) + .is_alert(true) + .finding_info( + openshell_ocsf::FindingInfo::new( + "bounding-set-clear-blocked", + "Capability Bounding Set Clear Blocked", + ) + .with_desc( + "The supervisor cannot clear the child capability bounding set \ + because PR_CAPBSET_DROP returns EPERM. \ + The child process will rely on seccomp for confinement. \ + This is expected in rootless container runtimes with \ + AppArmor user-namespace restrictions.", + ), + ) + .message(format!( + "PR_CAPBSET_DROP blocked, capability bounding set non-empty: {bounding:?}" + )) + .build() + ); +} + // Pins the pre-seccomp child mount namespace where supervisor identity sockets // are shadowed. Children enter it with setns before dropping privileges. #[cfg(target_os = "linux")] @@ -548,11 +622,14 @@ impl ProcessHandle { } } - // Probe Landlock availability and emit OCSF logs from the parent - // process where the tracing subscriber is functional. The child's - // pre_exec context cannot reliably emit structured logs. + // Probe Landlock and capability bounding-set availability and emit + // OCSF logs from the parent process where the tracing subscriber is + // functional. The child's pre_exec context cannot reliably emit + // structured logs. #[cfg(target_os = "linux")] sandbox::linux::log_sandbox_readiness(policy, workdir); + #[cfg(target_os = "linux")] + log_capability_bounding_set_readiness(); // Phase 1 (as root): Prepare Landlock ruleset by opening PathFds. // This MUST happen before drop_privileges() so that root-only paths @@ -1150,22 +1227,17 @@ mod tests { #[test] #[cfg(target_os = "linux")] - fn capability_bounding_set_clear_rejects_nonempty_eperm() { + fn capability_bounding_set_clear_tolerates_nonempty_eperm() { let mut remaining = capctl::caps::CapSet::empty(); remaining.add(capctl::caps::Cap::CHOWN); - let result = validate_capability_bounding_set_clear( - Err(capctl::Error::from_code(libc::EPERM)), - remaining, - || panic!("unknown capabilities should not be checked when known caps remain"), - ); - - assert!(result.is_err()); assert!( - result - .unwrap_err() - .to_string() - .contains("Failed to clear child capability bounding set") + validate_capability_bounding_set_clear( + Err(capctl::Error::from_code(libc::EPERM)), + remaining, + || panic!("unknown capabilities should not be checked when known caps remain"), + ) + .is_ok() ); } @@ -1270,21 +1342,7 @@ mod tests { let result = drop_privileges(&policy); - #[cfg(target_os = "linux")] - { - if capability_bounding_set_clear_available() { - assert!(result.is_ok(), "drop_privileges failed: {result:?}"); - } else { - let msg = format!("{}", result.unwrap_err()); - assert!( - msg.contains("Failed to clear child capability bounding set"), - "unexpected failure: {msg}" - ); - } - } - - #[cfg(not(target_os = "linux"))] - assert!(result.is_ok()); + assert!(result.is_ok(), "drop_privileges failed: {result:?}"); } #[test]