From 0af74000d249ae2cf8c2a57332969e8a79f647f1 Mon Sep 17 00:00:00 2001 From: Andrew Phillips Date: Sat, 14 Mar 2026 16:01:54 -0300 Subject: [PATCH] fix: eliminate unsafe code via nix, command-fds, and thread-local cookie MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace 4 unsafe sites with safe wrappers: - libc::pipe2 → nix::unistd::pipe2 (safe OwnedFd return) - File::from_raw_fd → File::from(OwnedFd) (safe ownership transfer) - unsafe impl Send for SendCookie → thread_local! lazy Cookie (each thread gets its own independent Cookie, no Send needed) - pre_exec + libc::fcntl → command-fds crate fd_mappings() (handles CLOEXEC clearing safely, also fixes potential fd leak on spawn failure via OwnedFd RAII) Only libc::umask remains as a single unavoidable unsafe site (no safe Rust wrapper exists for the umask syscall). Also updates AGENTS.md to remove stale SendCookie exception. --- AGENTS.md | 2 +- Cargo.lock | 11 +++++ Cargo.toml | 3 +- src/meta_plugin/magic_file.rs | 80 +++++++++++-------------------- src/modes/diff.rs | 89 +++++++++++++++++------------------ 5 files changed, 83 insertions(+), 102 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 93bb2e2..5f2efbe 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -29,7 +29,7 @@ TERM=dumb cargo build --features server # With server feature - Filter plugins must implement `filter()`, `clone_box()`, and `options()` - Meta plugins extend `BaseMetaPlugin` for boilerplate reduction - Enum string representations: `#[strum(serialize_all = "snake_case")]` -- Lint rules: `deny(clippy::all)`, `deny(unsafe_code)` (except `libc::umask` in main.rs, `unsafe impl Send` in `src/meta_plugin/magic_file.rs` for `SendCookie`) +- Lint rules: `deny(clippy::all)`, `deny(unsafe_code)` (except `libc::umask` in main.rs) - Feature flags: `default = ["magic", "lz4", "gzip"]`; optional: `server`, `swagger` ## Testing diff --git a/Cargo.lock b/Cargo.lock index bd0b243..c223d7e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -488,6 +488,16 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "command-fds" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f849b92c694fe237ecd8fafd1ba0df7ae0d45c1df6daeb7f68ed4220d51640bd" +dependencies = [ + "nix", + "thiserror 2.0.18", +] + [[package]] name = "config" version = "0.15.21" @@ -1660,6 +1670,7 @@ dependencies = [ "clap", "clap_complete", "comfy-table", + "command-fds", "config", "ctor", "derive_more", diff --git a/Cargo.toml b/Cargo.toml index 4147de6..f9a29b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ base64 = "0.22" chrono = { version = "0.4", features = ["serde"] } clap = { version = "4.6", features = ["derive", "env"] } clap_complete = "4" +command-fds = "0.3" config = "0.15" ctor = "0.2" directories = "6.0" @@ -39,7 +40,7 @@ local-ip-address = "0.6" log = "0.4" lz4_flex = { version = "0.12", optional = true } magic = { version = "0.13", optional = true } -nix = "0.30" +nix = { version = "0.30", features = ["fs", "process"] } once_cell = "1.21" comfy-table = "7.2" pwhash = "1.0" diff --git a/src/meta_plugin/magic_file.rs b/src/meta_plugin/magic_file.rs index 5a7290d..8e82007 100644 --- a/src/meta_plugin/magic_file.rs +++ b/src/meta_plugin/magic_file.rs @@ -3,7 +3,6 @@ use magic::{Cookie, CookieFlags}; #[cfg(not(feature = "magic"))] use std::process::{Command, Stdio}; -use log::debug; use std::io::{self, Write}; use std::path::Path; @@ -12,27 +11,14 @@ use crate::meta_plugin::{ process_metadata_outputs, }; -/// Wrapper around `magic::Cookie` that is Send. -/// -/// Libmagic cookies are thread-safe per-instance (separate cookies have -/// independent state). The raw pointer `*mut magic_sys::magic_set` does not -/// auto-derive Send, but concurrent access to distinct cookies is safe per -/// the libmagic documentation. +// Thread-local libmagic cookie, lazily initialized on first access per thread. +// Each thread gets its own independent Cookie instance. Libmagic documents that +// separate cookies can be used from different threads concurrently without +// synchronization. Using thread_local! avoids unsafe impl Send since the +// storage is inherently !Send. #[cfg(feature = "magic")] -struct SendCookie(Cookie); - -#[cfg(feature = "magic")] -// SAFETY: Each SendCookie owns a distinct libmagic instance. Libmagic -// documents that separate cookies can be used from different threads -// concurrently without synchronization. -#[allow(unsafe_code)] -unsafe impl Send for SendCookie {} - -#[cfg(feature = "magic")] -impl std::fmt::Debug for SendCookie { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("SendCookie").finish() - } +thread_local! { + static MAGIC_COOKIE: std::cell::RefCell> = const { std::cell::RefCell::new(None) }; } #[cfg(feature = "magic")] @@ -41,7 +27,6 @@ pub struct MagicFileMetaPluginImpl { buffer: Vec, max_buffer_size: usize, is_finalized: bool, - cookie: Option, base: BaseMetaPlugin, } @@ -68,14 +53,28 @@ impl MagicFileMetaPluginImpl { buffer: Vec::new(), max_buffer_size, is_finalized: false, - cookie: None, base, } } fn get_magic_result(&self, flags: CookieFlags) -> io::Result { - if let Some(send_cookie) = &self.cookie { - let cookie = &send_cookie.0; + MAGIC_COOKIE.with(|cell| { + // Lazy init: create cookie on first access per thread + { + let mut opt = cell.borrow_mut(); + if opt.is_none() { + let cookie = Cookie::open(CookieFlags::default()) + .map_err(|e| io::Error::other(format!("Failed to open magic: {e}")))?; + cookie.load(&[] as &[&Path]).map_err(|e| { + io::Error::other(format!("Failed to load magic database: {e}")) + })?; + *opt = Some(cookie); + } + } + + let cookie_ref = cell.borrow(); + let cookie = cookie_ref.as_ref().expect("cookie initialized above"); + cookie .set_flags(flags) .map_err(|e| io::Error::other(format!("Failed to set magic flags: {e}")))?; @@ -84,13 +83,8 @@ impl MagicFileMetaPluginImpl { .buffer(&self.buffer) .map_err(|e| io::Error::other(format!("Failed to analyze buffer: {e}")))?; - // Clean up the result - remove extra whitespace - let trimmed = result.trim().to_string(); - - Ok(trimmed) - } else { - Err(io::Error::other("Magic cookie not initialized")) - } + Ok(result.trim().to_string()) + }) } fn process_magic_types(&self) -> Vec { @@ -130,27 +124,7 @@ impl MetaPlugin for MagicFileMetaPluginImpl { } fn initialize(&mut self) -> MetaPluginResponse { - let cookie = match Cookie::open(CookieFlags::default()) { - Ok(cookie) => cookie, - Err(e) => { - debug!("META: MagicFile plugin: failed to create cookie: {e}"); - return MetaPluginResponse { - metadata: Vec::new(), - is_finalized: true, - }; - } - }; - - if let Err(e) = cookie.load(&[] as &[&Path]) { - debug!("META: MagicFile plugin: failed to load magic database: {e}"); - return MetaPluginResponse { - metadata: Vec::new(), - is_finalized: true, - }; - } - - self.cookie = Some(SendCookie(cookie)); - + // Cookie is lazily initialized in the thread-local on first use. MetaPluginResponse { metadata: Vec::new(), is_finalized: false, diff --git a/src/modes/diff.rs b/src/modes/diff.rs index c7d7e47..7b19d1f 100644 --- a/src/modes/diff.rs +++ b/src/modes/diff.rs @@ -9,10 +9,12 @@ use crate::services::compression_service::CompressionService; use crate::services::item_service::ItemService; use anyhow::{Context, Result}; use clap::Command; +use command_fds::{CommandFdExt, FdMapping}; use log::debug; +use nix::fcntl::OFlag; +use nix::unistd::pipe2; use std::io::Read; -use std::os::unix::io::{FromRawFd, RawFd}; -use std::os::unix::process::CommandExt; +use std::os::unix::io::{AsRawFd, OwnedFd}; fn validate_diff_args(_cmd: &mut Command, ids: &[i64], tags: &[String]) -> anyhow::Result<()> { if !tags.is_empty() { @@ -88,30 +90,19 @@ pub fn mode_diff( run_external_diff(&item_service, &item_a, &item_b) } -/// Creates a pipe via libc, returns (read_fd, write_fd). -#[allow(unsafe_code)] -fn create_pipe() -> Result<(RawFd, RawFd)> { - let mut fds = [0i32; 2]; - // pipe2 with O_CLOEXEC is atomic — no race between pipe() and fcntl() - let ret = unsafe { libc::pipe2(fds.as_mut_ptr(), libc::O_CLOEXEC) }; - if ret != 0 { - return Err(anyhow::anyhow!( - "Failed to create pipe: {}", - std::io::Error::last_os_error() - )); - } - Ok((fds[0], fds[1])) +/// Creates a pipe with CLOEXEC set atomically, returns (read_fd, write_fd). +fn create_pipe() -> Result<(OwnedFd, OwnedFd)> { + pipe2(OFlag::O_CLOEXEC).context("Failed to create pipe") } /// Streams decompressed item content through a pipe fd. /// /// Returns a JoinHandle for the writer thread. The thread writes decompressed /// data to write_fd and closes it when done (causing EOF for the reader). -#[allow(unsafe_code)] fn spawn_writer_thread( item_service: &ItemService, item: &crate::services::types::ItemWithMeta, - write_fd: RawFd, + write_fd: OwnedFd, ) -> std::thread::JoinHandle> { let data_path = item_service.get_data_path().clone(); let item_id = item.item.id.expect("item must have ID"); @@ -125,8 +116,8 @@ fn spawn_writer_thread( .stream_item_content(item_path, &compression) .map_err(|e| anyhow::anyhow!("Failed to stream item {item_id}: {e}"))?; - // Wrap write_fd in a File so it's closed when this scope ends - let mut writer = unsafe { std::fs::File::from_raw_fd(write_fd) }; + // Convert OwnedFd to File — safe, takes ownership, closes on drop + let mut writer = std::fs::File::from(write_fd); let mut buf = [0u8; PIPESIZE]; loop { match reader.read(&mut buf) { @@ -147,8 +138,7 @@ fn spawn_writer_thread( /// /// Creates two pipes, spawns writer threads to decompress each item into its pipe, /// and runs `diff -u /dev/fd/N /dev/fd/M` where N and M are the pipe read fds. -/// The read ends have CLOEXEC cleared in pre_exec so diff can open them. -#[allow(unsafe_code)] +/// The `command-fds` crate handles CLOEXEC clearing safely — no unsafe needed. fn run_external_diff( item_service: &ItemService, item_a: &crate::services::types::ItemWithMeta, @@ -163,35 +153,40 @@ fn run_external_diff( let (read_fd_a, write_fd_a) = create_pipe()?; let (read_fd_b, write_fd_b) = create_pipe()?; - debug!("DIFF: pipe fds: a(r={read_fd_a}, w={write_fd_a}) b(r={read_fd_b}, w={write_fd_b})"); - - // Spawn writer threads before diff — they decompress and write to pipe write ends. - // Threads take ownership of write_fd via from_raw_fd and close them on exit. + // Spawn writer threads — they take ownership of write fds and close them on exit let writer_a = spawn_writer_thread(item_service, item_a, write_fd_a); let writer_b = spawn_writer_thread(item_service, item_b, write_fd_b); - // Spawn diff with /dev/fd/N paths. pre_exec clears CLOEXEC on the pipe read fds - // so they survive the close_fds step in _do_spawn and diff can open them. - let mut child = unsafe { - std::process::Command::new("diff") - .arg("-u") - .arg(format!("/dev/fd/{read_fd_a}")) - .arg(format!("/dev/fd/{read_fd_b}")) - .stdout(std::process::Stdio::inherit()) - .stderr(std::process::Stdio::inherit()) - .stdin(std::process::Stdio::null()) - .pre_exec(move || { - // Clear CLOEXEC on pipe read fds so they survive exec - if libc::fcntl(read_fd_a, libc::F_SETFD, 0) == -1 - || libc::fcntl(read_fd_b, libc::F_SETFD, 0) == -1 - { - return Err(std::io::Error::last_os_error()); - } - Ok(()) - }) - .spawn() - .context("Failed to spawn diff command")? - }; + // Get fd numbers for /dev/fd paths (borrows, does not consume) + let raw_read_a = read_fd_a.as_raw_fd(); + let raw_read_b = read_fd_b.as_raw_fd(); + + debug!("DIFF: pipe fds: a(r={raw_read_a}) b(r={raw_read_b})"); + + // Spawn diff with /dev/fd/N paths. command-fds handles CLOEXEC clearing + // and fd inheritance safely — the fds are released from OwnedFd to the + // child process. If spawn fails, the OwnedFd values in FdMapping are + // dropped and the fds are properly closed. + let mut command = std::process::Command::new("diff"); + command + .arg("-u") + .arg(format!("/dev/fd/{raw_read_a}")) + .arg(format!("/dev/fd/{raw_read_b}")) + .stdout(std::process::Stdio::inherit()) + .stderr(std::process::Stdio::inherit()) + .stdin(std::process::Stdio::null()) + .fd_mappings(vec![ + FdMapping { + parent_fd: read_fd_a, + child_fd: raw_read_a, + }, + FdMapping { + parent_fd: read_fd_b, + child_fd: raw_read_b, + }, + ]) + .map_err(|e| anyhow::anyhow!("FD mapping collision: {e}"))?; + let mut child = command.spawn().context("Failed to spawn diff command")?; let status = child.wait().context("Failed to wait for diff command")?;