fix: eliminate unsafe code via nix, command-fds, and thread-local cookie
Replace 4 unsafe sites with safe wrappers: - libc::pipe2 → nix::unistd::pipe2 (safe OwnedFd return) - File::from_raw_fd → File::from(OwnedFd) (safe ownership transfer) - unsafe impl Send for SendCookie → thread_local! lazy Cookie (each thread gets its own independent Cookie, no Send needed) - pre_exec + libc::fcntl → command-fds crate fd_mappings() (handles CLOEXEC clearing safely, also fixes potential fd leak on spawn failure via OwnedFd RAII) Only libc::umask remains as a single unavoidable unsafe site (no safe Rust wrapper exists for the umask syscall). Also updates AGENTS.md to remove stale SendCookie exception.
This commit is contained in:
@@ -3,7 +3,6 @@ use magic::{Cookie, CookieFlags};
|
||||
#[cfg(not(feature = "magic"))]
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
use log::debug;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
|
||||
@@ -12,27 +11,14 @@ use crate::meta_plugin::{
|
||||
process_metadata_outputs,
|
||||
};
|
||||
|
||||
/// Wrapper around `magic::Cookie` that is Send.
|
||||
///
|
||||
/// Libmagic cookies are thread-safe per-instance (separate cookies have
|
||||
/// independent state). The raw pointer `*mut magic_sys::magic_set` does not
|
||||
/// auto-derive Send, but concurrent access to distinct cookies is safe per
|
||||
/// the libmagic documentation.
|
||||
// Thread-local libmagic cookie, lazily initialized on first access per thread.
|
||||
// Each thread gets its own independent Cookie instance. Libmagic documents that
|
||||
// separate cookies can be used from different threads concurrently without
|
||||
// synchronization. Using thread_local! avoids unsafe impl Send since the
|
||||
// storage is inherently !Send.
|
||||
#[cfg(feature = "magic")]
|
||||
struct SendCookie(Cookie);
|
||||
|
||||
#[cfg(feature = "magic")]
|
||||
// SAFETY: Each SendCookie owns a distinct libmagic instance. Libmagic
|
||||
// documents that separate cookies can be used from different threads
|
||||
// concurrently without synchronization.
|
||||
#[allow(unsafe_code)]
|
||||
unsafe impl Send for SendCookie {}
|
||||
|
||||
#[cfg(feature = "magic")]
|
||||
impl std::fmt::Debug for SendCookie {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SendCookie").finish()
|
||||
}
|
||||
thread_local! {
|
||||
static MAGIC_COOKIE: std::cell::RefCell<Option<Cookie>> = const { std::cell::RefCell::new(None) };
|
||||
}
|
||||
|
||||
#[cfg(feature = "magic")]
|
||||
@@ -41,7 +27,6 @@ pub struct MagicFileMetaPluginImpl {
|
||||
buffer: Vec<u8>,
|
||||
max_buffer_size: usize,
|
||||
is_finalized: bool,
|
||||
cookie: Option<SendCookie>,
|
||||
base: BaseMetaPlugin,
|
||||
}
|
||||
|
||||
@@ -68,14 +53,28 @@ impl MagicFileMetaPluginImpl {
|
||||
buffer: Vec::new(),
|
||||
max_buffer_size,
|
||||
is_finalized: false,
|
||||
cookie: None,
|
||||
base,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
|
||||
if let Some(send_cookie) = &self.cookie {
|
||||
let cookie = &send_cookie.0;
|
||||
MAGIC_COOKIE.with(|cell| {
|
||||
// Lazy init: create cookie on first access per thread
|
||||
{
|
||||
let mut opt = cell.borrow_mut();
|
||||
if opt.is_none() {
|
||||
let cookie = Cookie::open(CookieFlags::default())
|
||||
.map_err(|e| io::Error::other(format!("Failed to open magic: {e}")))?;
|
||||
cookie.load(&[] as &[&Path]).map_err(|e| {
|
||||
io::Error::other(format!("Failed to load magic database: {e}"))
|
||||
})?;
|
||||
*opt = Some(cookie);
|
||||
}
|
||||
}
|
||||
|
||||
let cookie_ref = cell.borrow();
|
||||
let cookie = cookie_ref.as_ref().expect("cookie initialized above");
|
||||
|
||||
cookie
|
||||
.set_flags(flags)
|
||||
.map_err(|e| io::Error::other(format!("Failed to set magic flags: {e}")))?;
|
||||
@@ -84,13 +83,8 @@ impl MagicFileMetaPluginImpl {
|
||||
.buffer(&self.buffer)
|
||||
.map_err(|e| io::Error::other(format!("Failed to analyze buffer: {e}")))?;
|
||||
|
||||
// Clean up the result - remove extra whitespace
|
||||
let trimmed = result.trim().to_string();
|
||||
|
||||
Ok(trimmed)
|
||||
} else {
|
||||
Err(io::Error::other("Magic cookie not initialized"))
|
||||
}
|
||||
Ok(result.trim().to_string())
|
||||
})
|
||||
}
|
||||
|
||||
fn process_magic_types(&self) -> Vec<MetaData> {
|
||||
@@ -130,27 +124,7 @@ impl MetaPlugin for MagicFileMetaPluginImpl {
|
||||
}
|
||||
|
||||
fn initialize(&mut self) -> MetaPluginResponse {
|
||||
let cookie = match Cookie::open(CookieFlags::default()) {
|
||||
Ok(cookie) => cookie,
|
||||
Err(e) => {
|
||||
debug!("META: MagicFile plugin: failed to create cookie: {e}");
|
||||
return MetaPluginResponse {
|
||||
metadata: Vec::new(),
|
||||
is_finalized: true,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = cookie.load(&[] as &[&Path]) {
|
||||
debug!("META: MagicFile plugin: failed to load magic database: {e}");
|
||||
return MetaPluginResponse {
|
||||
metadata: Vec::new(),
|
||||
is_finalized: true,
|
||||
};
|
||||
}
|
||||
|
||||
self.cookie = Some(SendCookie(cookie));
|
||||
|
||||
// Cookie is lazily initialized in the thread-local on first use.
|
||||
MetaPluginResponse {
|
||||
metadata: Vec::new(),
|
||||
is_finalized: false,
|
||||
|
||||
@@ -9,10 +9,12 @@ use crate::services::compression_service::CompressionService;
|
||||
use crate::services::item_service::ItemService;
|
||||
use anyhow::{Context, Result};
|
||||
use clap::Command;
|
||||
use command_fds::{CommandFdExt, FdMapping};
|
||||
use log::debug;
|
||||
use nix::fcntl::OFlag;
|
||||
use nix::unistd::pipe2;
|
||||
use std::io::Read;
|
||||
use std::os::unix::io::{FromRawFd, RawFd};
|
||||
use std::os::unix::process::CommandExt;
|
||||
use std::os::unix::io::{AsRawFd, OwnedFd};
|
||||
|
||||
fn validate_diff_args(_cmd: &mut Command, ids: &[i64], tags: &[String]) -> anyhow::Result<()> {
|
||||
if !tags.is_empty() {
|
||||
@@ -88,30 +90,19 @@ pub fn mode_diff(
|
||||
run_external_diff(&item_service, &item_a, &item_b)
|
||||
}
|
||||
|
||||
/// Creates a pipe via libc, returns (read_fd, write_fd).
|
||||
#[allow(unsafe_code)]
|
||||
fn create_pipe() -> Result<(RawFd, RawFd)> {
|
||||
let mut fds = [0i32; 2];
|
||||
// pipe2 with O_CLOEXEC is atomic — no race between pipe() and fcntl()
|
||||
let ret = unsafe { libc::pipe2(fds.as_mut_ptr(), libc::O_CLOEXEC) };
|
||||
if ret != 0 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Failed to create pipe: {}",
|
||||
std::io::Error::last_os_error()
|
||||
));
|
||||
}
|
||||
Ok((fds[0], fds[1]))
|
||||
/// Creates a pipe with CLOEXEC set atomically, returns (read_fd, write_fd).
|
||||
fn create_pipe() -> Result<(OwnedFd, OwnedFd)> {
|
||||
pipe2(OFlag::O_CLOEXEC).context("Failed to create pipe")
|
||||
}
|
||||
|
||||
/// Streams decompressed item content through a pipe fd.
|
||||
///
|
||||
/// Returns a JoinHandle for the writer thread. The thread writes decompressed
|
||||
/// data to write_fd and closes it when done (causing EOF for the reader).
|
||||
#[allow(unsafe_code)]
|
||||
fn spawn_writer_thread(
|
||||
item_service: &ItemService,
|
||||
item: &crate::services::types::ItemWithMeta,
|
||||
write_fd: RawFd,
|
||||
write_fd: OwnedFd,
|
||||
) -> std::thread::JoinHandle<Result<()>> {
|
||||
let data_path = item_service.get_data_path().clone();
|
||||
let item_id = item.item.id.expect("item must have ID");
|
||||
@@ -125,8 +116,8 @@ fn spawn_writer_thread(
|
||||
.stream_item_content(item_path, &compression)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to stream item {item_id}: {e}"))?;
|
||||
|
||||
// Wrap write_fd in a File so it's closed when this scope ends
|
||||
let mut writer = unsafe { std::fs::File::from_raw_fd(write_fd) };
|
||||
// Convert OwnedFd to File — safe, takes ownership, closes on drop
|
||||
let mut writer = std::fs::File::from(write_fd);
|
||||
let mut buf = [0u8; PIPESIZE];
|
||||
loop {
|
||||
match reader.read(&mut buf) {
|
||||
@@ -147,8 +138,7 @@ fn spawn_writer_thread(
|
||||
///
|
||||
/// Creates two pipes, spawns writer threads to decompress each item into its pipe,
|
||||
/// and runs `diff -u /dev/fd/N /dev/fd/M` where N and M are the pipe read fds.
|
||||
/// The read ends have CLOEXEC cleared in pre_exec so diff can open them.
|
||||
#[allow(unsafe_code)]
|
||||
/// The `command-fds` crate handles CLOEXEC clearing safely — no unsafe needed.
|
||||
fn run_external_diff(
|
||||
item_service: &ItemService,
|
||||
item_a: &crate::services::types::ItemWithMeta,
|
||||
@@ -163,35 +153,40 @@ fn run_external_diff(
|
||||
let (read_fd_a, write_fd_a) = create_pipe()?;
|
||||
let (read_fd_b, write_fd_b) = create_pipe()?;
|
||||
|
||||
debug!("DIFF: pipe fds: a(r={read_fd_a}, w={write_fd_a}) b(r={read_fd_b}, w={write_fd_b})");
|
||||
|
||||
// Spawn writer threads before diff — they decompress and write to pipe write ends.
|
||||
// Threads take ownership of write_fd via from_raw_fd and close them on exit.
|
||||
// Spawn writer threads — they take ownership of write fds and close them on exit
|
||||
let writer_a = spawn_writer_thread(item_service, item_a, write_fd_a);
|
||||
let writer_b = spawn_writer_thread(item_service, item_b, write_fd_b);
|
||||
|
||||
// Spawn diff with /dev/fd/N paths. pre_exec clears CLOEXEC on the pipe read fds
|
||||
// so they survive the close_fds step in _do_spawn and diff can open them.
|
||||
let mut child = unsafe {
|
||||
std::process::Command::new("diff")
|
||||
.arg("-u")
|
||||
.arg(format!("/dev/fd/{read_fd_a}"))
|
||||
.arg(format!("/dev/fd/{read_fd_b}"))
|
||||
.stdout(std::process::Stdio::inherit())
|
||||
.stderr(std::process::Stdio::inherit())
|
||||
.stdin(std::process::Stdio::null())
|
||||
.pre_exec(move || {
|
||||
// Clear CLOEXEC on pipe read fds so they survive exec
|
||||
if libc::fcntl(read_fd_a, libc::F_SETFD, 0) == -1
|
||||
|| libc::fcntl(read_fd_b, libc::F_SETFD, 0) == -1
|
||||
{
|
||||
return Err(std::io::Error::last_os_error());
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.spawn()
|
||||
.context("Failed to spawn diff command")?
|
||||
};
|
||||
// Get fd numbers for /dev/fd paths (borrows, does not consume)
|
||||
let raw_read_a = read_fd_a.as_raw_fd();
|
||||
let raw_read_b = read_fd_b.as_raw_fd();
|
||||
|
||||
debug!("DIFF: pipe fds: a(r={raw_read_a}) b(r={raw_read_b})");
|
||||
|
||||
// Spawn diff with /dev/fd/N paths. command-fds handles CLOEXEC clearing
|
||||
// and fd inheritance safely — the fds are released from OwnedFd to the
|
||||
// child process. If spawn fails, the OwnedFd values in FdMapping are
|
||||
// dropped and the fds are properly closed.
|
||||
let mut command = std::process::Command::new("diff");
|
||||
command
|
||||
.arg("-u")
|
||||
.arg(format!("/dev/fd/{raw_read_a}"))
|
||||
.arg(format!("/dev/fd/{raw_read_b}"))
|
||||
.stdout(std::process::Stdio::inherit())
|
||||
.stderr(std::process::Stdio::inherit())
|
||||
.stdin(std::process::Stdio::null())
|
||||
.fd_mappings(vec![
|
||||
FdMapping {
|
||||
parent_fd: read_fd_a,
|
||||
child_fd: raw_read_a,
|
||||
},
|
||||
FdMapping {
|
||||
parent_fd: read_fd_b,
|
||||
child_fd: raw_read_b,
|
||||
},
|
||||
])
|
||||
.map_err(|e| anyhow::anyhow!("FD mapping collision: {e}"))?;
|
||||
let mut child = command.spawn().context("Failed to spawn diff command")?;
|
||||
|
||||
let status = child.wait().context("Failed to wait for diff command")?;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user