fix: eliminate unsafe code via nix, command-fds, and thread-local cookie

Replace 4 unsafe sites with safe wrappers:

- libc::pipe2 → nix::unistd::pipe2 (safe OwnedFd return)
- File::from_raw_fd → File::from(OwnedFd) (safe ownership transfer)
- unsafe impl Send for SendCookie → thread_local! lazy Cookie
  (each thread gets its own independent Cookie, no Send needed)
- pre_exec + libc::fcntl → command-fds crate fd_mappings()
  (handles CLOEXEC clearing safely, also fixes potential fd leak
  on spawn failure via OwnedFd RAII)

Only libc::umask remains as a single unavoidable unsafe site
(no safe Rust wrapper exists for the umask syscall).

Also updates AGENTS.md to remove stale SendCookie exception.
This commit is contained in:
2026-03-14 16:01:54 -03:00
parent 9a1e23e85f
commit 0af74000d2
5 changed files with 83 additions and 102 deletions

View File

@@ -29,7 +29,7 @@ TERM=dumb cargo build --features server # With server feature
- Filter plugins must implement `filter()`, `clone_box()`, and `options()` - Filter plugins must implement `filter()`, `clone_box()`, and `options()`
- Meta plugins extend `BaseMetaPlugin` for boilerplate reduction - Meta plugins extend `BaseMetaPlugin` for boilerplate reduction
- Enum string representations: `#[strum(serialize_all = "snake_case")]` - Enum string representations: `#[strum(serialize_all = "snake_case")]`
- Lint rules: `deny(clippy::all)`, `deny(unsafe_code)` (except `libc::umask` in main.rs, `unsafe impl Send` in `src/meta_plugin/magic_file.rs` for `SendCookie`) - Lint rules: `deny(clippy::all)`, `deny(unsafe_code)` (except `libc::umask` in main.rs)
- Feature flags: `default = ["magic", "lz4", "gzip"]`; optional: `server`, `swagger` - Feature flags: `default = ["magic", "lz4", "gzip"]`; optional: `server`, `swagger`
## Testing ## Testing

11
Cargo.lock generated
View File

@@ -488,6 +488,16 @@ dependencies = [
"unicode-width", "unicode-width",
] ]
[[package]]
name = "command-fds"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f849b92c694fe237ecd8fafd1ba0df7ae0d45c1df6daeb7f68ed4220d51640bd"
dependencies = [
"nix",
"thiserror 2.0.18",
]
[[package]] [[package]]
name = "config" name = "config"
version = "0.15.21" version = "0.15.21"
@@ -1660,6 +1670,7 @@ dependencies = [
"clap", "clap",
"clap_complete", "clap_complete",
"comfy-table", "comfy-table",
"command-fds",
"config", "config",
"ctor", "ctor",
"derive_more", "derive_more",

View File

@@ -19,6 +19,7 @@ base64 = "0.22"
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }
clap = { version = "4.6", features = ["derive", "env"] } clap = { version = "4.6", features = ["derive", "env"] }
clap_complete = "4" clap_complete = "4"
command-fds = "0.3"
config = "0.15" config = "0.15"
ctor = "0.2" ctor = "0.2"
directories = "6.0" directories = "6.0"
@@ -39,7 +40,7 @@ local-ip-address = "0.6"
log = "0.4" log = "0.4"
lz4_flex = { version = "0.12", optional = true } lz4_flex = { version = "0.12", optional = true }
magic = { version = "0.13", optional = true } magic = { version = "0.13", optional = true }
nix = "0.30" nix = { version = "0.30", features = ["fs", "process"] }
once_cell = "1.21" once_cell = "1.21"
comfy-table = "7.2" comfy-table = "7.2"
pwhash = "1.0" pwhash = "1.0"

View File

@@ -3,7 +3,6 @@ use magic::{Cookie, CookieFlags};
#[cfg(not(feature = "magic"))] #[cfg(not(feature = "magic"))]
use std::process::{Command, Stdio}; use std::process::{Command, Stdio};
use log::debug;
use std::io::{self, Write}; use std::io::{self, Write};
use std::path::Path; use std::path::Path;
@@ -12,27 +11,14 @@ use crate::meta_plugin::{
process_metadata_outputs, process_metadata_outputs,
}; };
/// Wrapper around `magic::Cookie` that is Send. // Thread-local libmagic cookie, lazily initialized on first access per thread.
/// // Each thread gets its own independent Cookie instance. Libmagic documents that
/// Libmagic cookies are thread-safe per-instance (separate cookies have // separate cookies can be used from different threads concurrently without
/// independent state). The raw pointer `*mut magic_sys::magic_set` does not // synchronization. Using thread_local! avoids unsafe impl Send since the
/// auto-derive Send, but concurrent access to distinct cookies is safe per // storage is inherently !Send.
/// the libmagic documentation.
#[cfg(feature = "magic")] #[cfg(feature = "magic")]
struct SendCookie(Cookie); thread_local! {
static MAGIC_COOKIE: std::cell::RefCell<Option<Cookie>> = const { std::cell::RefCell::new(None) };
#[cfg(feature = "magic")]
// SAFETY: Each SendCookie owns a distinct libmagic instance. Libmagic
// documents that separate cookies can be used from different threads
// concurrently without synchronization.
#[allow(unsafe_code)]
unsafe impl Send for SendCookie {}
#[cfg(feature = "magic")]
impl std::fmt::Debug for SendCookie {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SendCookie").finish()
}
} }
#[cfg(feature = "magic")] #[cfg(feature = "magic")]
@@ -41,7 +27,6 @@ pub struct MagicFileMetaPluginImpl {
buffer: Vec<u8>, buffer: Vec<u8>,
max_buffer_size: usize, max_buffer_size: usize,
is_finalized: bool, is_finalized: bool,
cookie: Option<SendCookie>,
base: BaseMetaPlugin, base: BaseMetaPlugin,
} }
@@ -68,14 +53,28 @@ impl MagicFileMetaPluginImpl {
buffer: Vec::new(), buffer: Vec::new(),
max_buffer_size, max_buffer_size,
is_finalized: false, is_finalized: false,
cookie: None,
base, base,
} }
} }
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> { fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
if let Some(send_cookie) = &self.cookie { MAGIC_COOKIE.with(|cell| {
let cookie = &send_cookie.0; // Lazy init: create cookie on first access per thread
{
let mut opt = cell.borrow_mut();
if opt.is_none() {
let cookie = Cookie::open(CookieFlags::default())
.map_err(|e| io::Error::other(format!("Failed to open magic: {e}")))?;
cookie.load(&[] as &[&Path]).map_err(|e| {
io::Error::other(format!("Failed to load magic database: {e}"))
})?;
*opt = Some(cookie);
}
}
let cookie_ref = cell.borrow();
let cookie = cookie_ref.as_ref().expect("cookie initialized above");
cookie cookie
.set_flags(flags) .set_flags(flags)
.map_err(|e| io::Error::other(format!("Failed to set magic flags: {e}")))?; .map_err(|e| io::Error::other(format!("Failed to set magic flags: {e}")))?;
@@ -84,13 +83,8 @@ impl MagicFileMetaPluginImpl {
.buffer(&self.buffer) .buffer(&self.buffer)
.map_err(|e| io::Error::other(format!("Failed to analyze buffer: {e}")))?; .map_err(|e| io::Error::other(format!("Failed to analyze buffer: {e}")))?;
// Clean up the result - remove extra whitespace Ok(result.trim().to_string())
let trimmed = result.trim().to_string(); })
Ok(trimmed)
} else {
Err(io::Error::other("Magic cookie not initialized"))
}
} }
fn process_magic_types(&self) -> Vec<MetaData> { fn process_magic_types(&self) -> Vec<MetaData> {
@@ -130,27 +124,7 @@ impl MetaPlugin for MagicFileMetaPluginImpl {
} }
fn initialize(&mut self) -> MetaPluginResponse { fn initialize(&mut self) -> MetaPluginResponse {
let cookie = match Cookie::open(CookieFlags::default()) { // Cookie is lazily initialized in the thread-local on first use.
Ok(cookie) => cookie,
Err(e) => {
debug!("META: MagicFile plugin: failed to create cookie: {e}");
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
};
if let Err(e) = cookie.load(&[] as &[&Path]) {
debug!("META: MagicFile plugin: failed to load magic database: {e}");
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
self.cookie = Some(SendCookie(cookie));
MetaPluginResponse { MetaPluginResponse {
metadata: Vec::new(), metadata: Vec::new(),
is_finalized: false, is_finalized: false,

View File

@@ -9,10 +9,12 @@ use crate::services::compression_service::CompressionService;
use crate::services::item_service::ItemService; use crate::services::item_service::ItemService;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use clap::Command; use clap::Command;
use command_fds::{CommandFdExt, FdMapping};
use log::debug; use log::debug;
use nix::fcntl::OFlag;
use nix::unistd::pipe2;
use std::io::Read; use std::io::Read;
use std::os::unix::io::{FromRawFd, RawFd}; use std::os::unix::io::{AsRawFd, OwnedFd};
use std::os::unix::process::CommandExt;
fn validate_diff_args(_cmd: &mut Command, ids: &[i64], tags: &[String]) -> anyhow::Result<()> { fn validate_diff_args(_cmd: &mut Command, ids: &[i64], tags: &[String]) -> anyhow::Result<()> {
if !tags.is_empty() { if !tags.is_empty() {
@@ -88,30 +90,19 @@ pub fn mode_diff(
run_external_diff(&item_service, &item_a, &item_b) run_external_diff(&item_service, &item_a, &item_b)
} }
/// Creates a pipe via libc, returns (read_fd, write_fd). /// Creates a pipe with CLOEXEC set atomically, returns (read_fd, write_fd).
#[allow(unsafe_code)] fn create_pipe() -> Result<(OwnedFd, OwnedFd)> {
fn create_pipe() -> Result<(RawFd, RawFd)> { pipe2(OFlag::O_CLOEXEC).context("Failed to create pipe")
let mut fds = [0i32; 2];
// pipe2 with O_CLOEXEC is atomic — no race between pipe() and fcntl()
let ret = unsafe { libc::pipe2(fds.as_mut_ptr(), libc::O_CLOEXEC) };
if ret != 0 {
return Err(anyhow::anyhow!(
"Failed to create pipe: {}",
std::io::Error::last_os_error()
));
}
Ok((fds[0], fds[1]))
} }
/// Streams decompressed item content through a pipe fd. /// Streams decompressed item content through a pipe fd.
/// ///
/// Returns a JoinHandle for the writer thread. The thread writes decompressed /// Returns a JoinHandle for the writer thread. The thread writes decompressed
/// data to write_fd and closes it when done (causing EOF for the reader). /// data to write_fd and closes it when done (causing EOF for the reader).
#[allow(unsafe_code)]
fn spawn_writer_thread( fn spawn_writer_thread(
item_service: &ItemService, item_service: &ItemService,
item: &crate::services::types::ItemWithMeta, item: &crate::services::types::ItemWithMeta,
write_fd: RawFd, write_fd: OwnedFd,
) -> std::thread::JoinHandle<Result<()>> { ) -> std::thread::JoinHandle<Result<()>> {
let data_path = item_service.get_data_path().clone(); let data_path = item_service.get_data_path().clone();
let item_id = item.item.id.expect("item must have ID"); let item_id = item.item.id.expect("item must have ID");
@@ -125,8 +116,8 @@ fn spawn_writer_thread(
.stream_item_content(item_path, &compression) .stream_item_content(item_path, &compression)
.map_err(|e| anyhow::anyhow!("Failed to stream item {item_id}: {e}"))?; .map_err(|e| anyhow::anyhow!("Failed to stream item {item_id}: {e}"))?;
// Wrap write_fd in a File so it's closed when this scope ends // Convert OwnedFd to File — safe, takes ownership, closes on drop
let mut writer = unsafe { std::fs::File::from_raw_fd(write_fd) }; let mut writer = std::fs::File::from(write_fd);
let mut buf = [0u8; PIPESIZE]; let mut buf = [0u8; PIPESIZE];
loop { loop {
match reader.read(&mut buf) { match reader.read(&mut buf) {
@@ -147,8 +138,7 @@ fn spawn_writer_thread(
/// ///
/// Creates two pipes, spawns writer threads to decompress each item into its pipe, /// Creates two pipes, spawns writer threads to decompress each item into its pipe,
/// and runs `diff -u /dev/fd/N /dev/fd/M` where N and M are the pipe read fds. /// and runs `diff -u /dev/fd/N /dev/fd/M` where N and M are the pipe read fds.
/// The read ends have CLOEXEC cleared in pre_exec so diff can open them. /// The `command-fds` crate handles CLOEXEC clearing safely — no unsafe needed.
#[allow(unsafe_code)]
fn run_external_diff( fn run_external_diff(
item_service: &ItemService, item_service: &ItemService,
item_a: &crate::services::types::ItemWithMeta, item_a: &crate::services::types::ItemWithMeta,
@@ -163,35 +153,40 @@ fn run_external_diff(
let (read_fd_a, write_fd_a) = create_pipe()?; let (read_fd_a, write_fd_a) = create_pipe()?;
let (read_fd_b, write_fd_b) = create_pipe()?; let (read_fd_b, write_fd_b) = create_pipe()?;
debug!("DIFF: pipe fds: a(r={read_fd_a}, w={write_fd_a}) b(r={read_fd_b}, w={write_fd_b})"); // Spawn writer threads — they take ownership of write fds and close them on exit
// Spawn writer threads before diff — they decompress and write to pipe write ends.
// Threads take ownership of write_fd via from_raw_fd and close them on exit.
let writer_a = spawn_writer_thread(item_service, item_a, write_fd_a); let writer_a = spawn_writer_thread(item_service, item_a, write_fd_a);
let writer_b = spawn_writer_thread(item_service, item_b, write_fd_b); let writer_b = spawn_writer_thread(item_service, item_b, write_fd_b);
// Spawn diff with /dev/fd/N paths. pre_exec clears CLOEXEC on the pipe read fds // Get fd numbers for /dev/fd paths (borrows, does not consume)
// so they survive the close_fds step in _do_spawn and diff can open them. let raw_read_a = read_fd_a.as_raw_fd();
let mut child = unsafe { let raw_read_b = read_fd_b.as_raw_fd();
std::process::Command::new("diff")
debug!("DIFF: pipe fds: a(r={raw_read_a}) b(r={raw_read_b})");
// Spawn diff with /dev/fd/N paths. command-fds handles CLOEXEC clearing
// and fd inheritance safely — the fds are released from OwnedFd to the
// child process. If spawn fails, the OwnedFd values in FdMapping are
// dropped and the fds are properly closed.
let mut command = std::process::Command::new("diff");
command
.arg("-u") .arg("-u")
.arg(format!("/dev/fd/{read_fd_a}")) .arg(format!("/dev/fd/{raw_read_a}"))
.arg(format!("/dev/fd/{read_fd_b}")) .arg(format!("/dev/fd/{raw_read_b}"))
.stdout(std::process::Stdio::inherit()) .stdout(std::process::Stdio::inherit())
.stderr(std::process::Stdio::inherit()) .stderr(std::process::Stdio::inherit())
.stdin(std::process::Stdio::null()) .stdin(std::process::Stdio::null())
.pre_exec(move || { .fd_mappings(vec![
// Clear CLOEXEC on pipe read fds so they survive exec FdMapping {
if libc::fcntl(read_fd_a, libc::F_SETFD, 0) == -1 parent_fd: read_fd_a,
|| libc::fcntl(read_fd_b, libc::F_SETFD, 0) == -1 child_fd: raw_read_a,
{ },
return Err(std::io::Error::last_os_error()); FdMapping {
} parent_fd: read_fd_b,
Ok(()) child_fd: raw_read_b,
}) },
.spawn() ])
.context("Failed to spawn diff command")? .map_err(|e| anyhow::anyhow!("FD mapping collision: {e}"))?;
}; let mut child = command.spawn().context("Failed to spawn diff command")?;
let status = child.wait().context("Failed to wait for diff command")?; let status = child.wait().context("Failed to wait for diff command")?;