Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
310 changes: 310 additions & 0 deletions crates/openshell-bootstrap/src/docker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,76 @@ fn env_bool(key: &str) -> Option<bool> {
})
}

/// Parse a human-readable memory size string into bytes.
///
/// Accepts integers (bytes) or values with `k`/`m`/`g`/`t` suffixes
/// (case-insensitive, with or without a trailing `b`). Binary units
/// (`ki`/`mi`/`gi`/`ti`) are also accepted. Examples: `80g`, `4096m`,
/// `0.5g`, `1073741824`.
///
/// Returns an error if the value is empty, uses an unknown suffix, overflows
/// `i64`, or is below the 4 MiB minimum required by Docker.
pub fn parse_memory_limit(s: &str) -> Result<i64> {
let s = s.trim().to_ascii_lowercase();
if s.is_empty() {
miette::bail!("empty memory limit string");
}

// Split into numeric part and optional suffix.
let (num_str, suffix) = match s.find(|c: char| !c.is_ascii_digit() && c != '.') {
Some(idx) => (&s[..idx], s[idx..].trim_end_matches('b')),
None => (s.as_str(), ""),
};

let value: f64 = num_str
.parse()
.into_diagnostic()
.wrap_err_with(|| format!("invalid numeric part in memory limit: {num_str}"))?;

let multiplier: f64 = match suffix {
"" => 1.0,
"k" | "ki" => 1024.0,
"m" | "mi" => 1024.0 * 1024.0,
"g" | "gi" => 1024.0 * 1024.0 * 1024.0,
"t" | "ti" => 1024.0 * 1024.0 * 1024.0 * 1024.0,
other => miette::bail!("unknown memory suffix: {other}"),
};

let raw = value * multiplier;
if raw > i64::MAX as f64 {
miette::bail!("memory limit too large (exceeds i64::MAX): {s}");
}
#[allow(clippy::cast_possible_truncation)]
let bytes = raw as i64;

// Docker requires at least ~6 MiB; enforce a 4 MiB floor so users get a
// clear error instead of an opaque Docker API rejection.
const MIN_MEMORY_BYTES: i64 = 4 * 1024 * 1024;
if bytes < MIN_MEMORY_BYTES {
miette::bail!("memory limit must be at least 4 MiB, got: {s} ({bytes} bytes)");
}
Ok(bytes)
}

/// Detect a safe memory limit for the gateway container.
///
/// Queries the Docker daemon for `MemTotal` (via `docker info`) and returns
/// 80% of that value. On macOS and Windows the daemon runs inside a Linux VM
/// (Docker Desktop, colima, WSL2), so the reported total reflects the VM's
/// allocated memory rather than the full host RAM.
///
/// Returns `None` if the daemon does not report memory information.
pub async fn detect_memory_limit(docker: &Docker) -> Option<i64> {
let info = docker.info().await.ok()?;
let total_bytes = info.mem_total?;
if total_bytes <= 0 {
return None;
}
#[allow(clippy::cast_possible_truncation)]
let limit = (total_bytes as f64 * 0.8) as i64;
Some(limit)
}

/// Platform information for a Docker daemon host.
#[derive(Debug, Clone)]
pub struct HostPlatform {
Expand Down Expand Up @@ -236,6 +306,62 @@ fn home_dir() -> Option<String> {
std::env::var("HOME").ok()
}

/// Discover upstream DNS resolvers from systemd-resolved's configuration.
///
/// Only reads `/run/systemd/resolve/resolv.conf` — the upstream resolver file
/// maintained by systemd-resolved. This file is only present on Linux hosts
/// running systemd-resolved (e.g., Ubuntu), so the function is a no-op on
/// macOS, Windows/WSL, and non-systemd Linux distributions.
///
/// We intentionally do NOT fall back to `/etc/resolv.conf` here. On Docker
/// Desktop (macOS/Windows), `/etc/resolv.conf` may contain non-loopback
/// resolvers that appear valid but are unreachable via direct UDP from inside
/// the container's network stack. Those environments rely on the entrypoint's
/// iptables DNAT proxy to Docker's embedded DNS — sniffing host resolvers
/// would bypass that proxy and break DNS.
///
/// Returns an empty vec if no usable resolvers are found.
/// Parse resolv.conf content, extracting nameserver IPs and filtering loopback addresses.
fn parse_resolv_conf(contents: &str) -> Vec<String> {
contents
.lines()
.filter_map(|line| {
let line = line.trim();
if !line.starts_with("nameserver") {
return None;
}
let ip = line.split_whitespace().nth(1)?;
if ip.starts_with("127.") || ip == "::1" {
return None;
}
Some(ip.to_string())
})
.collect()
}

fn resolve_upstream_dns() -> Vec<String> {
let paths = ["/run/systemd/resolve/resolv.conf"];

for path in &paths {
if let Ok(contents) = std::fs::read_to_string(path) {
let resolvers = parse_resolv_conf(&contents);

if !resolvers.is_empty() {
tracing::debug!(
"Discovered {} upstream DNS resolver(s) from {}: {}",
resolvers.len(),
path,
resolvers.join(", "),
);
return resolvers;
}
}
}

tracing::debug!("No upstream DNS resolvers found in host resolver config");
Vec::new()
}

/// Create an SSH Docker client from remote options.
pub async fn create_ssh_docker_client(remote: &RemoteOptions) -> Result<Docker> {
// Ensure destination has ssh:// prefix
Expand Down Expand Up @@ -455,6 +581,8 @@ pub async fn ensure_container(
registry_username: Option<&str>,
registry_token: Option<&str>,
gpu: bool,
is_remote: bool,
memory_limit: Option<i64>,
) -> Result<()> {
let container_name = container_name(name);

Expand Down Expand Up @@ -559,6 +687,15 @@ pub async fn ensure_container(
}]);
}

// Apply memory limit. When set, Docker OOM-kills the container instead of
// letting unchecked sandbox growth trigger the host kernel OOM killer.
// Setting memory_swap equal to memory disables swap inside the container.
if let Some(mem) = memory_limit {
host_config.memory = Some(mem);
host_config.memory_swap = Some(mem);
tracing::info!("Container memory limit: {} MiB", mem / (1024 * 1024),);
}

let mut cmd = vec![
"server".to_string(),
"--disable=traefik".to_string(),
Expand Down Expand Up @@ -675,6 +812,17 @@ pub async fn ensure_container(
env_vars.push("GPU_ENABLED=true".to_string());
}

// Pass upstream DNS resolvers discovered on the host so the entrypoint
// can configure k3s without probing files inside the container.
// Skip for remote deploys — the local host's resolvers are likely wrong
// for the remote Docker host (different network, split-horizon DNS, etc.).
if !is_remote {
let upstream_dns = resolve_upstream_dns();
if !upstream_dns.is_empty() {
env_vars.push(format!("UPSTREAM_DNS={}", upstream_dns.join(",")));
}
}

let env = Some(env_vars);

let config = ContainerCreateBody {
Expand Down Expand Up @@ -1195,4 +1343,166 @@ mod tests {
"should return a reasonable number of sockets"
);
}

#[test]
fn resolve_upstream_dns_filters_loopback() {
// This test validates the function runs without panic on the current host.
// The exact output depends on the host's DNS config, but loopback
// addresses must never appear in the result.
let resolvers = resolve_upstream_dns();
for r in &resolvers {
assert!(
!r.starts_with("127."),
"IPv4 loopback should be filtered: {r}"
);
assert_ne!(r, "::1", "IPv6 loopback should be filtered");
}
}

#[test]
fn resolve_upstream_dns_returns_vec() {
// Verify the function returns a vec (may be empty in some CI environments
// where no resolv.conf exists, but should never panic).
let resolvers = resolve_upstream_dns();
assert!(
resolvers.len() <= 20,
"should return a reasonable number of resolvers"
);
}

#[test]
fn parse_resolv_conf_filters_ipv4_loopback() {
let input = "nameserver 127.0.0.1\nnameserver 127.0.0.53\nnameserver 127.0.0.11\n";
assert!(parse_resolv_conf(input).is_empty());
}

#[test]
fn parse_resolv_conf_filters_ipv6_loopback() {
let input = "nameserver ::1\n";
assert!(parse_resolv_conf(input).is_empty());
}

#[test]
fn parse_resolv_conf_passes_real_resolvers() {
let input = "nameserver 8.8.8.8\nnameserver 1.1.1.1\n";
assert_eq!(parse_resolv_conf(input), vec!["8.8.8.8", "1.1.1.1"]);
}

#[test]
fn parse_resolv_conf_mixed_loopback_and_real() {
let input =
"nameserver 127.0.0.53\nnameserver ::1\nnameserver 10.0.0.1\nnameserver 172.16.0.1\n";
assert_eq!(parse_resolv_conf(input), vec!["10.0.0.1", "172.16.0.1"]);
}

#[test]
fn parse_resolv_conf_ignores_comments_and_other_lines() {
let input =
"# nameserver 8.8.8.8\nsearch example.com\noptions ndots:5\nnameserver 1.1.1.1\n";
assert_eq!(parse_resolv_conf(input), vec!["1.1.1.1"]);
}

#[test]
fn parse_resolv_conf_handles_tabs_and_extra_spaces() {
let input = "nameserver\t8.8.8.8\nnameserver 1.1.1.1\n";
assert_eq!(parse_resolv_conf(input), vec!["8.8.8.8", "1.1.1.1"]);
}

#[test]
fn parse_resolv_conf_empty_input() {
assert!(parse_resolv_conf("").is_empty());
assert!(parse_resolv_conf(" \n\n").is_empty());
}

#[test]
fn parse_resolv_conf_bare_nameserver_keyword() {
assert!(parse_resolv_conf("nameserver\n").is_empty());
assert!(parse_resolv_conf("nameserver \n").is_empty());
}

#[test]
fn parse_resolv_conf_systemd_resolved_typical() {
let input =
"# This is /run/systemd/resolve/resolv.conf\nnameserver 192.168.1.1\nsearch lan\n";
assert_eq!(parse_resolv_conf(input), vec!["192.168.1.1"]);
}

#[test]
fn parse_resolv_conf_crlf_line_endings() {
let input = "nameserver 8.8.8.8\r\nnameserver 1.1.1.1\r\n";
assert_eq!(parse_resolv_conf(input), vec!["8.8.8.8", "1.1.1.1"]);
}

#[test]
fn parse_memory_limit_gigabytes() {
assert_eq!(parse_memory_limit("80g").unwrap(), 80 * 1024 * 1024 * 1024);
assert_eq!(parse_memory_limit("80G").unwrap(), 80 * 1024 * 1024 * 1024);
assert_eq!(parse_memory_limit("80gb").unwrap(), 80 * 1024 * 1024 * 1024);
}

#[test]
fn parse_memory_limit_megabytes() {
assert_eq!(parse_memory_limit("4096m").unwrap(), 4096 * 1024 * 1024);
assert_eq!(parse_memory_limit("4096M").unwrap(), 4096 * 1024 * 1024);
}

#[test]
fn parse_memory_limit_bare_bytes() {
assert_eq!(parse_memory_limit("1073741824").unwrap(), 1073741824);
}

#[test]
fn parse_memory_limit_binary_suffixes() {
assert_eq!(parse_memory_limit("1gi").unwrap(), 1024 * 1024 * 1024);
assert_eq!(parse_memory_limit("1gib").unwrap(), 1024 * 1024 * 1024);
}

#[test]
fn parse_memory_limit_rejects_empty() {
assert!(parse_memory_limit("").is_err());
}

#[test]
fn parse_memory_limit_rejects_unknown_suffix() {
assert!(parse_memory_limit("10x").is_err());
}

#[test]
fn parse_memory_limit_fractional() {
// 0.5g = 512 MiB
assert_eq!(parse_memory_limit("0.5g").unwrap(), 512 * 1024 * 1024);
}

#[test]
fn parse_memory_limit_rejects_zero() {
assert!(parse_memory_limit("0g").is_err());
}

#[test]
fn parse_memory_limit_rejects_negative() {
assert!(parse_memory_limit("-1g").is_err());
}

#[test]
fn parse_memory_limit_rejects_below_minimum() {
// 1 KiB is well below the 4 MiB floor
assert!(parse_memory_limit("1k").is_err());
}

#[test]
fn parse_memory_limit_rejects_overflow() {
// 99999999t exceeds i64::MAX (~9.2 exabytes)
assert!(parse_memory_limit("99999999t").is_err());
}

#[test]
fn parse_memory_limit_whitespace() {
assert_eq!(
parse_memory_limit(" 80g ").unwrap(),
80 * 1024 * 1024 * 1024
);
}

// detect_memory_limit is async and requires a Docker daemon connection,
// so it is tested via integration / e2e tests rather than unit tests.
}
Loading
Loading