fix: eliminate unsafe code via nix, command-fds, and thread-local cookie

Replace 4 unsafe sites with safe wrappers:

- libc::pipe2 → nix::unistd::pipe2 (safe OwnedFd return)
- File::from_raw_fd → File::from(OwnedFd) (safe ownership transfer)
- unsafe impl Send for SendCookie → thread_local! lazy Cookie
  (each thread gets its own independent Cookie, no Send needed)
- pre_exec + libc::fcntl → command-fds crate fd_mappings()
  (handles CLOEXEC clearing safely, also fixes potential fd leak
  on spawn failure via OwnedFd RAII)

Only libc::umask remains as a single unavoidable unsafe site
(no safe Rust wrapper exists for the umask syscall).

Also updates AGENTS.md to remove stale SendCookie exception.
This commit is contained in:
2026-03-14 16:01:54 -03:00
parent 9a1e23e85f
commit 0af74000d2
5 changed files with 83 additions and 102 deletions

View File

@@ -29,7 +29,7 @@ TERM=dumb cargo build --features server # With server feature
- Filter plugins must implement `filter()`, `clone_box()`, and `options()`
- Meta plugins extend `BaseMetaPlugin` for boilerplate reduction
- Enum string representations: `#[strum(serialize_all = "snake_case")]`
- Lint rules: `deny(clippy::all)`, `deny(unsafe_code)` (except `libc::umask` in main.rs, `unsafe impl Send` in `src/meta_plugin/magic_file.rs` for `SendCookie`)
- Lint rules: `deny(clippy::all)`, `deny(unsafe_code)` (except `libc::umask` in main.rs)
- Feature flags: `default = ["magic", "lz4", "gzip"]`; optional: `server`, `swagger`
## Testing

11
Cargo.lock generated
View File

@@ -488,6 +488,16 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "command-fds"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f849b92c694fe237ecd8fafd1ba0df7ae0d45c1df6daeb7f68ed4220d51640bd"
dependencies = [
"nix",
"thiserror 2.0.18",
]
[[package]]
name = "config"
version = "0.15.21"
@@ -1660,6 +1670,7 @@ dependencies = [
"clap",
"clap_complete",
"comfy-table",
"command-fds",
"config",
"ctor",
"derive_more",

View File

@@ -19,6 +19,7 @@ base64 = "0.22"
chrono = { version = "0.4", features = ["serde"] }
clap = { version = "4.6", features = ["derive", "env"] }
clap_complete = "4"
command-fds = "0.3"
config = "0.15"
ctor = "0.2"
directories = "6.0"
@@ -39,7 +40,7 @@ local-ip-address = "0.6"
log = "0.4"
lz4_flex = { version = "0.12", optional = true }
magic = { version = "0.13", optional = true }
nix = "0.30"
nix = { version = "0.30", features = ["fs", "process"] }
once_cell = "1.21"
comfy-table = "7.2"
pwhash = "1.0"

View File

@@ -3,7 +3,6 @@ use magic::{Cookie, CookieFlags};
#[cfg(not(feature = "magic"))]
use std::process::{Command, Stdio};
use log::debug;
use std::io::{self, Write};
use std::path::Path;
@@ -12,27 +11,14 @@ use crate::meta_plugin::{
process_metadata_outputs,
};
/// Wrapper around `magic::Cookie` that is Send.
///
/// Libmagic cookies are thread-safe per-instance (separate cookies have
/// independent state). The raw pointer `*mut magic_sys::magic_set` does not
/// auto-derive Send, but concurrent access to distinct cookies is safe per
/// the libmagic documentation.
// Thread-local libmagic cookie, lazily initialized on first access per thread.
// Each thread gets its own independent Cookie instance. Libmagic documents that
// separate cookies can be used from different threads concurrently without
// synchronization. Using thread_local! avoids unsafe impl Send since the
// storage is inherently !Send.
#[cfg(feature = "magic")]
struct SendCookie(Cookie);
#[cfg(feature = "magic")]
// SAFETY: Each SendCookie owns a distinct libmagic instance. Libmagic
// documents that separate cookies can be used from different threads
// concurrently without synchronization.
#[allow(unsafe_code)]
unsafe impl Send for SendCookie {}
#[cfg(feature = "magic")]
impl std::fmt::Debug for SendCookie {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SendCookie").finish()
}
thread_local! {
static MAGIC_COOKIE: std::cell::RefCell<Option<Cookie>> = const { std::cell::RefCell::new(None) };
}
#[cfg(feature = "magic")]
@@ -41,7 +27,6 @@ pub struct MagicFileMetaPluginImpl {
buffer: Vec<u8>,
max_buffer_size: usize,
is_finalized: bool,
cookie: Option<SendCookie>,
base: BaseMetaPlugin,
}
@@ -68,14 +53,28 @@ impl MagicFileMetaPluginImpl {
buffer: Vec::new(),
max_buffer_size,
is_finalized: false,
cookie: None,
base,
}
}
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
if let Some(send_cookie) = &self.cookie {
let cookie = &send_cookie.0;
MAGIC_COOKIE.with(|cell| {
// Lazy init: create cookie on first access per thread
{
let mut opt = cell.borrow_mut();
if opt.is_none() {
let cookie = Cookie::open(CookieFlags::default())
.map_err(|e| io::Error::other(format!("Failed to open magic: {e}")))?;
cookie.load(&[] as &[&Path]).map_err(|e| {
io::Error::other(format!("Failed to load magic database: {e}"))
})?;
*opt = Some(cookie);
}
}
let cookie_ref = cell.borrow();
let cookie = cookie_ref.as_ref().expect("cookie initialized above");
cookie
.set_flags(flags)
.map_err(|e| io::Error::other(format!("Failed to set magic flags: {e}")))?;
@@ -84,13 +83,8 @@ impl MagicFileMetaPluginImpl {
.buffer(&self.buffer)
.map_err(|e| io::Error::other(format!("Failed to analyze buffer: {e}")))?;
// Clean up the result - remove extra whitespace
let trimmed = result.trim().to_string();
Ok(trimmed)
} else {
Err(io::Error::other("Magic cookie not initialized"))
}
Ok(result.trim().to_string())
})
}
fn process_magic_types(&self) -> Vec<MetaData> {
@@ -130,27 +124,7 @@ impl MetaPlugin for MagicFileMetaPluginImpl {
}
fn initialize(&mut self) -> MetaPluginResponse {
let cookie = match Cookie::open(CookieFlags::default()) {
Ok(cookie) => cookie,
Err(e) => {
debug!("META: MagicFile plugin: failed to create cookie: {e}");
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
};
if let Err(e) = cookie.load(&[] as &[&Path]) {
debug!("META: MagicFile plugin: failed to load magic database: {e}");
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
self.cookie = Some(SendCookie(cookie));
// Cookie is lazily initialized in the thread-local on first use.
MetaPluginResponse {
metadata: Vec::new(),
is_finalized: false,

View File

@@ -9,10 +9,12 @@ use crate::services::compression_service::CompressionService;
use crate::services::item_service::ItemService;
use anyhow::{Context, Result};
use clap::Command;
use command_fds::{CommandFdExt, FdMapping};
use log::debug;
use nix::fcntl::OFlag;
use nix::unistd::pipe2;
use std::io::Read;
use std::os::unix::io::{FromRawFd, RawFd};
use std::os::unix::process::CommandExt;
use std::os::unix::io::{AsRawFd, OwnedFd};
fn validate_diff_args(_cmd: &mut Command, ids: &[i64], tags: &[String]) -> anyhow::Result<()> {
if !tags.is_empty() {
@@ -88,30 +90,19 @@ pub fn mode_diff(
run_external_diff(&item_service, &item_a, &item_b)
}
/// Creates a pipe via libc, returns (read_fd, write_fd).
#[allow(unsafe_code)]
fn create_pipe() -> Result<(RawFd, RawFd)> {
let mut fds = [0i32; 2];
// pipe2 with O_CLOEXEC is atomic — no race between pipe() and fcntl()
let ret = unsafe { libc::pipe2(fds.as_mut_ptr(), libc::O_CLOEXEC) };
if ret != 0 {
return Err(anyhow::anyhow!(
"Failed to create pipe: {}",
std::io::Error::last_os_error()
));
}
Ok((fds[0], fds[1]))
/// Creates a pipe with CLOEXEC set atomically, returns (read_fd, write_fd).
fn create_pipe() -> Result<(OwnedFd, OwnedFd)> {
pipe2(OFlag::O_CLOEXEC).context("Failed to create pipe")
}
/// Streams decompressed item content through a pipe fd.
///
/// Returns a JoinHandle for the writer thread. The thread writes decompressed
/// data to write_fd and closes it when done (causing EOF for the reader).
#[allow(unsafe_code)]
fn spawn_writer_thread(
item_service: &ItemService,
item: &crate::services::types::ItemWithMeta,
write_fd: RawFd,
write_fd: OwnedFd,
) -> std::thread::JoinHandle<Result<()>> {
let data_path = item_service.get_data_path().clone();
let item_id = item.item.id.expect("item must have ID");
@@ -125,8 +116,8 @@ fn spawn_writer_thread(
.stream_item_content(item_path, &compression)
.map_err(|e| anyhow::anyhow!("Failed to stream item {item_id}: {e}"))?;
// Wrap write_fd in a File so it's closed when this scope ends
let mut writer = unsafe { std::fs::File::from_raw_fd(write_fd) };
// Convert OwnedFd to File — safe, takes ownership, closes on drop
let mut writer = std::fs::File::from(write_fd);
let mut buf = [0u8; PIPESIZE];
loop {
match reader.read(&mut buf) {
@@ -147,8 +138,7 @@ fn spawn_writer_thread(
///
/// Creates two pipes, spawns writer threads to decompress each item into its pipe,
/// and runs `diff -u /dev/fd/N /dev/fd/M` where N and M are the pipe read fds.
/// The read ends have CLOEXEC cleared in pre_exec so diff can open them.
#[allow(unsafe_code)]
/// The `command-fds` crate handles CLOEXEC clearing safely — no unsafe needed.
fn run_external_diff(
item_service: &ItemService,
item_a: &crate::services::types::ItemWithMeta,
@@ -163,35 +153,40 @@ fn run_external_diff(
let (read_fd_a, write_fd_a) = create_pipe()?;
let (read_fd_b, write_fd_b) = create_pipe()?;
debug!("DIFF: pipe fds: a(r={read_fd_a}, w={write_fd_a}) b(r={read_fd_b}, w={write_fd_b})");
// Spawn writer threads before diff — they decompress and write to pipe write ends.
// Threads take ownership of write_fd via from_raw_fd and close them on exit.
// Spawn writer threads — they take ownership of write fds and close them on exit
let writer_a = spawn_writer_thread(item_service, item_a, write_fd_a);
let writer_b = spawn_writer_thread(item_service, item_b, write_fd_b);
// Spawn diff with /dev/fd/N paths. pre_exec clears CLOEXEC on the pipe read fds
// so they survive the close_fds step in _do_spawn and diff can open them.
let mut child = unsafe {
std::process::Command::new("diff")
.arg("-u")
.arg(format!("/dev/fd/{read_fd_a}"))
.arg(format!("/dev/fd/{read_fd_b}"))
.stdout(std::process::Stdio::inherit())
.stderr(std::process::Stdio::inherit())
.stdin(std::process::Stdio::null())
.pre_exec(move || {
// Clear CLOEXEC on pipe read fds so they survive exec
if libc::fcntl(read_fd_a, libc::F_SETFD, 0) == -1
|| libc::fcntl(read_fd_b, libc::F_SETFD, 0) == -1
{
return Err(std::io::Error::last_os_error());
}
Ok(())
})
.spawn()
.context("Failed to spawn diff command")?
};
// Get fd numbers for /dev/fd paths (borrows, does not consume)
let raw_read_a = read_fd_a.as_raw_fd();
let raw_read_b = read_fd_b.as_raw_fd();
debug!("DIFF: pipe fds: a(r={raw_read_a}) b(r={raw_read_b})");
// Spawn diff with /dev/fd/N paths. command-fds handles CLOEXEC clearing
// and fd inheritance safely — the fds are released from OwnedFd to the
// child process. If spawn fails, the OwnedFd values in FdMapping are
// dropped and the fds are properly closed.
let mut command = std::process::Command::new("diff");
command
.arg("-u")
.arg(format!("/dev/fd/{raw_read_a}"))
.arg(format!("/dev/fd/{raw_read_b}"))
.stdout(std::process::Stdio::inherit())
.stderr(std::process::Stdio::inherit())
.stdin(std::process::Stdio::null())
.fd_mappings(vec![
FdMapping {
parent_fd: read_fd_a,
child_fd: raw_read_a,
},
FdMapping {
parent_fd: read_fd_b,
child_fd: raw_read_b,
},
])
.map_err(|e| anyhow::anyhow!("FD mapping collision: {e}"))?;
let mut child = command.spawn().context("Failed to spawn diff command")?;
let status = child.wait().context("Failed to wait for diff command")?;