Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/branch-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,36 @@ jobs:
- name: Test
run: mise run test:python

rootless-caps:
name: Rootless capability tests
needs: pr_metadata
if: needs.pr_metadata.outputs.should_run == 'true'
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0

- uses: actions-rust-lang/setup-rust-toolchain@150fca883cd4034361b621bd4e6a9d34e5143606 # v1.15.4
with:
toolchain: "1.95.0"
cache: false

- name: Run supervisor capability tests without CAP_SETPCAP
run: |
sudo useradd -m testuser
sudo chmod a+rx /home/runner /home/runner/work /home/runner/work/OpenShell
sudo chmod -R a+rX "$GITHUB_WORKSPACE"
sudo cp -r /home/runner/.rustup /home/testuser/.rustup
sudo chown -R testuser: /home/testuser/.rustup
sudo mkdir -p /home/testuser/.cargo
sudo cp /home/runner/.cargo/config.toml /home/testuser/.cargo/ 2>/dev/null || true
sudo chown -R testuser: /home/testuser/.cargo
sudo -u testuser env \
PATH="/home/testuser/.cargo/bin:/home/testuser/.rustup/toolchains/1.95.0-x86_64-unknown-linux-gnu/bin:$PATH" \
CARGO_HOME="/home/testuser/.cargo" \
RUSTUP_HOME="/home/testuser/.rustup" \
CARGO_TARGET_DIR="/home/testuser/target" \
bash -c "cd $GITHUB_WORKSPACE && cargo test -p openshell-supervisor-process --lib -- capability_bounding drop_privileges"
markdown:
name: Markdown
needs: pr_metadata
Expand Down
12 changes: 8 additions & 4 deletions architecture/sandbox.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@ Each sandbox workload has two trust levels:
The supervisor keeps enough privilege to manage the sandbox, but the agent child
loses that privilege before user code runs. On Linux, child setup clears the
capability bounding set during privilege drop so later execs cannot regain
container-granted capabilities. This is fail-closed: the supervisor retains
`CAP_SETPCAP` solely to perform the clear, and spawning the workload or SSH shell
aborts unless the bounding set ends up empty. A `setpcap` `EPERM` is tolerated
only when the set is already empty; any other outcome fails the spawn.
container-granted capabilities. When `CAP_SETPCAP` is available, this is
fail-closed: the supervisor clears the bounding set and aborts if capabilities
remain. When `CAP_SETPCAP` is unavailable (rootless Podman with AppArmor
user-namespace restrictions, or similar environments), the supervisor logs a
warning, emits an OCSF `DetectionFinding` alert, and continues with the
bounding set intact. In this degraded mode the child process relies on seccomp
to block dangerous syscalls; Landlock filesystem restrictions are applied
independently and may also be active.

## Startup Flow

Expand Down
118 changes: 88 additions & 30 deletions crates/openshell-supervisor-process/src/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ use std::process::Stdio;
use std::sync::OnceLock;
use tokio::process::{Child, Command};
use tracing::debug;
#[cfg(target_os = "linux")]
use tracing::warn;

const SUPERVISOR_ONLY_ENV_VARS: &[&str] = &[
openshell_core::sandbox_env::SANDBOX_TOKEN,
Expand Down Expand Up @@ -189,12 +191,84 @@ fn validate_capability_bounding_set_clear(
"Failed to clear unknown child capability bounding set entries: {unknown_err}"
)),
},
Err(err) if err.code() == libc::EPERM => {
warn!(
?remaining,
"CAP_SETPCAP is unavailable and the child capability bounding set is non-empty; \
the child process relies on seccomp for confinement"
);
Ok(())
}
Err(err) => Err(miette::miette!(
"Failed to clear child capability bounding set: {err}"
)),
}
}

/// Probe capability bounding-set availability and emit an OCSF
/// `DetectionFinding` from the parent process when `bounding::clear()`
/// would fail and the bounding set is non-empty. Called once before
/// `pre_exec`/`fork()` so the event reaches the tracing subscriber.
///
/// The probe tries a non-destructive `bounding::drop()` on a capability
/// that is already absent from the bounding set. This triggers the same
/// `prctl(PR_CAPBSET_DROP)` syscall that `bounding::clear()` uses, so
/// AppArmor restrictions that block the syscall are detected even when
/// `CAP_SETPCAP` is nominally present in the effective set.
#[cfg(target_os = "linux")]
fn log_capability_bounding_set_readiness() {
use std::sync::Once;
static PROBED: Once = Once::new();
let mut already_probed = true;
PROBED.call_once(|| already_probed = false);
if already_probed {
return;
}

let bounding = capctl::caps::bounding::probe();
if bounding.is_empty() {
return;
}

// Find a capability NOT in the bounding set so that drop() is a no-op
// when the syscall is permitted. If every known capability is raised
// (unusual), skip the probe — clear() will be attempted in the child
// and the warn!() path handles failure there.
let probe_cap = capctl::caps::Cap::iter().find(|cap| !bounding.has(*cap));
let clear_blocked = probe_cap.is_some_and(|cap| {
capctl::caps::bounding::drop(cap).is_err_and(|e| e.code() == libc::EPERM)
});

if !clear_blocked {
return;
}

openshell_ocsf::ocsf_emit!(
openshell_ocsf::DetectionFindingBuilder::new(openshell_ocsf::ctx::ctx())
.activity(openshell_ocsf::ActivityId::Open)
.severity(openshell_ocsf::SeverityId::High)
.confidence(openshell_ocsf::ConfidenceId::High)
.is_alert(true)
.finding_info(
openshell_ocsf::FindingInfo::new(
"bounding-set-clear-blocked",
"Capability Bounding Set Clear Blocked",
)
.with_desc(
"The supervisor cannot clear the child capability bounding set \
because PR_CAPBSET_DROP returns EPERM. \
The child process will rely on seccomp for confinement. \
This is expected in rootless container runtimes with \
AppArmor user-namespace restrictions.",
),
)
.message(format!(
"PR_CAPBSET_DROP blocked, capability bounding set non-empty: {bounding:?}"
))
.build()
);
}

// Pins the pre-seccomp child mount namespace where supervisor identity sockets
// are shadowed. Children enter it with setns before dropping privileges.
#[cfg(target_os = "linux")]
Expand Down Expand Up @@ -548,11 +622,14 @@ impl ProcessHandle {
}
}

// Probe Landlock availability and emit OCSF logs from the parent
// process where the tracing subscriber is functional. The child's
// pre_exec context cannot reliably emit structured logs.
// Probe Landlock and capability bounding-set availability and emit
// OCSF logs from the parent process where the tracing subscriber is
// functional. The child's pre_exec context cannot reliably emit
// structured logs.
#[cfg(target_os = "linux")]
sandbox::linux::log_sandbox_readiness(policy, workdir);
#[cfg(target_os = "linux")]
log_capability_bounding_set_readiness();

// Phase 1 (as root): Prepare Landlock ruleset by opening PathFds.
// This MUST happen before drop_privileges() so that root-only paths
Expand Down Expand Up @@ -1150,22 +1227,17 @@ mod tests {

#[test]
#[cfg(target_os = "linux")]
fn capability_bounding_set_clear_rejects_nonempty_eperm() {
fn capability_bounding_set_clear_tolerates_nonempty_eperm() {
let mut remaining = capctl::caps::CapSet::empty();
remaining.add(capctl::caps::Cap::CHOWN);

let result = validate_capability_bounding_set_clear(
Err(capctl::Error::from_code(libc::EPERM)),
remaining,
|| panic!("unknown capabilities should not be checked when known caps remain"),
);

assert!(result.is_err());
assert!(
result
.unwrap_err()
.to_string()
.contains("Failed to clear child capability bounding set")
validate_capability_bounding_set_clear(
Err(capctl::Error::from_code(libc::EPERM)),
remaining,
|| panic!("unknown capabilities should not be checked when known caps remain"),
)
.is_ok()
);
}

Expand Down Expand Up @@ -1270,21 +1342,7 @@ mod tests {

let result = drop_privileges(&policy);

#[cfg(target_os = "linux")]
{
if capability_bounding_set_clear_available() {
assert!(result.is_ok(), "drop_privileges failed: {result:?}");
} else {
let msg = format!("{}", result.unwrap_err());
assert!(
msg.contains("Failed to clear child capability bounding set"),
"unexpected failure: {msg}"
);
}
}

#[cfg(not(target_os = "linux"))]
assert!(result.is_ok());
assert!(result.is_ok(), "drop_privileges failed: {result:?}");
}

#[test]
Expand Down
Loading