use anyhow::{anyhow, Result}; use clap::Command; use std::io::Read; use std::os::fd::FromRawFd; use std::str::FromStr; fn validate_diff_args(cmd: &mut Command, ids: &Vec, tags: &Vec) { if !tags.is_empty() { cmd.error( clap::error::ErrorKind::InvalidValue, "Tags are not supported with --diff. Please provide exactly two IDs.", ) .exit(); } if ids.len() != 2 { cmd.error( clap::error::ErrorKind::InvalidValue, "You must supply exactly two IDs when using --diff.", ) .exit(); } } fn fetch_and_validate_items( conn: &mut rusqlite::Connection, ids: &Vec, ) -> Result<(crate::db::Item, crate::db::Item), anyhow::Error> { // Fetch items, ensuring they exist. let item_a = crate::db::get_item(conn, ids[0])? .ok_or_else(|| anyhow::anyhow!("Unable to find first item (ID: {}) in database", ids[0]))?; let item_b = crate::db::get_item(conn, ids[1])? .ok_or_else(|| anyhow::anyhow!("Unable to find second item (ID: {}) in database", ids[1]))?; log::debug!("MAIN: Found item A {:?}", item_a); log::debug!("MAIN: Found item B {:?}", item_b); let item_a_id = item_a.id.ok_or_else(|| anyhow!("Item A missing ID"))?; let item_b_id = item_b.id.ok_or_else(|| anyhow!("Item B missing ID"))?; // Validate that item IDs are positive to prevent path traversal issues if item_a_id <= 0 || item_b_id <= 0 { return Err(anyhow::anyhow!("Invalid item ID: {} or {}", item_a_id, item_b_id)); } Ok((item_a, item_b)) } fn get_item_tags(conn: &mut rusqlite::Connection, item: &crate::db::Item) -> Result, anyhow::Error> { let tags: Vec = crate::db::get_item_tags(conn, item)? .into_iter() .map(|x| x.name) .collect(); Ok(tags) } fn setup_diff_paths_and_compression( data_path: &std::path::PathBuf, item_a: &crate::db::Item, item_b: &crate::db::Item, ) -> Result<(std::path::PathBuf, crate::compression_engine::CompressionType, std::path::PathBuf, crate::compression_engine::CompressionType), anyhow::Error> { let item_a_id = item_a.id.ok_or_else(|| anyhow::anyhow!("Item A missing ID"))?; let item_b_id = item_b.id.ok_or_else(|| anyhow::anyhow!("Item B missing ID"))?; let mut item_path_a = data_path.clone(); item_path_a.push(item_a_id.to_string()); let compression_type_a = crate::compression_engine::CompressionType::from_str(&item_a.compression)?; log::debug!("MAIN: Item A has compression type {:?}", compression_type_a); let mut item_path_b = data_path.clone(); item_path_b.push(item_b_id.to_string()); let compression_type_b = crate::compression_engine::CompressionType::from_str(&item_b.compression)?; log::debug!("MAIN: Item B has compression type {:?}", compression_type_b); Ok((item_path_a, compression_type_a, item_path_b, compression_type_b)) } fn setup_diff_pipes() -> Result<((libc::c_int, libc::c_int), (libc::c_int, libc::c_int)), anyhow::Error> { use nix::unistd::pipe; use nix::Error as NixError; use std::os::fd::IntoRawFd; // Create pipes for diff's input let (fd_a_read, fd_a_write) = pipe() .map_err(|e: NixError| anyhow::anyhow!("Failed to create pipe A: {}", e))?; let (fd_b_read, fd_b_write) = pipe() .map_err(|e: NixError| anyhow::anyhow!("Failed to create pipe B: {}", e))?; Ok(((fd_a_read.into_raw_fd(), fd_a_write.into_raw_fd()), (fd_b_read.into_raw_fd(), fd_b_write.into_raw_fd()))) } fn setup_fd_guards(fd_a_read: libc::c_int, fd_b_read: libc::c_int) -> (FdGuard, FdGuard) { // Wrap file descriptors in RAII guards let fd_a_read_guard = FdGuard::new(fd_a_read); let fd_b_read_guard = FdGuard::new(fd_b_read); (fd_a_read_guard, fd_b_read_guard) } fn spawn_diff_process( item_a_id: i64, item_a_tags: Vec, item_b_id: i64, item_b_tags: Vec, fd_a_read: libc::c_int, fd_b_read: libc::c_int, ) -> Result { log::debug!("MAIN: Creating child process for diff"); let mut diff_command = std::process::Command::new("diff"); diff_command .arg("-u") .arg("--label") .arg(format!( "Keep item A: {} {}", item_a_id, item_a_tags.join(" ") )) .arg(format!("/dev/fd/{}", fd_a_read)) .arg("--label") .arg(format!( "Keep item B: {} {}", item_b_id, item_b_tags.join(" ") )) .arg(format!("/dev/fd/{}", fd_b_read)) .stdin(std::process::Stdio::null()) .stdout(std::process::Stdio::piped()) .stderr(std::process::Stdio::piped()); let child_process = diff_command .spawn() .map_err(|e| anyhow::anyhow!("Failed to execute diff command: {}", e))?; Ok(child_process) } // RAII guard for file descriptors to ensure they're closed struct FdGuard { fd: libc::c_int, } impl FdGuard { fn new(fd: libc::c_int) -> Self { Self { fd } } } impl Drop for FdGuard { fn drop(&mut self) { let _ = nix::unistd::close(self.fd); } } // Create a function to write item data to a pipe fn write_item_to_pipe( item_path: std::path::PathBuf, compression_type: crate::compression_engine::CompressionType, pipe_writer_raw: std::fs::File, ) -> Result<(), anyhow::Error> { use std::io::BufWriter; let mut buffered_pipe_writer = BufWriter::new(pipe_writer_raw); let engine = crate::compression_engine::get_compression_engine(compression_type).expect("Unable to get compression engine"); log::debug!("THREAD: Sending item to diff"); engine .copy(item_path, &mut buffered_pipe_writer) .map_err(|e| anyhow::anyhow!("Failed to copy/compress item: {}", e))?; log::debug!("THREAD: Done sending item to diff"); Ok(()) } // Function to spawn a writer thread for an item fn spawn_writer_thread( item_path: std::path::PathBuf, compression_type: crate::compression_engine::CompressionType, fd_write: libc::c_int, ) -> std::thread::JoinHandle> { let pipe_writer_raw = unsafe { std::fs::File::from_raw_fd(fd_write) }; std::thread::spawn(move || { write_item_to_pipe(item_path, compression_type, pipe_writer_raw) }) } fn execute_diff_command( child_process: &mut std::process::Child, ) -> Result<(Vec, Vec), anyhow::Error> { let mut child_stdout_pipe = child_process .stdout .take() .expect("BUG: Failed to capture diff stdout pipe"); let mut child_stderr_pipe = child_process .stderr .take() .expect("BUG: Failed to capture diff stderr pipe"); log::debug!("MAIN: Creating threads for diff I/O"); // Thread to read diff's standard output let stdout_reader_thread = std::thread::spawn(move || { let mut output_buffer = Vec::new(); log::debug!("STDOUT_READER: Reading diff stdout"); // child_stdout_pipe is a ChildStdout, which implements std::io::Read child_stdout_pipe .read_to_end(&mut output_buffer) .map_err(|e| anyhow::anyhow!("Failed to read diff stdout: {}", e)) .map(|_| output_buffer) // Return the Vec on success }); // Thread to read diff's standard error let stderr_reader_thread = std::thread::spawn(move || { let mut error_buffer = Vec::new(); log::debug!("STDERR_READER: Reading diff stderr"); child_stderr_pipe .read_to_end(&mut error_buffer) .map_err(|e| anyhow::anyhow!("Failed to read diff stderr: {}", e)) .map(|_| error_buffer) }); // Retrieve the captured output from the reader threads. let stdout_capture_result = stdout_reader_thread .join() .map_err(|panic_payload| { anyhow::anyhow!("Stdout reader thread panicked: {:?}", panic_payload) })? .map_err(|e| anyhow::anyhow!("Failed to read diff stdout: {}", e))?; let stderr_capture_result = stderr_reader_thread .join() .map_err(|panic_payload| { anyhow::anyhow!("Stderr reader thread panicked: {:?}", panic_payload) })? .map_err(|e| anyhow::anyhow!("Failed to read diff stderr: {}", e))?; Ok((stdout_capture_result, stderr_capture_result)) } fn handle_diff_output( diff_status: std::process::ExitStatus, stdout_capture_result: Vec, stderr_capture_result: Vec, ) -> Result<(), anyhow::Error> { // Handle diff's exit status and output match diff_status.code() { Some(0) => { // Exit code 0: No differences log::debug!("MAIN: Diff successful, no differences found."); // Typically, diff -u doesn't print to stdout if no differences. // But if it did, it would be shown here. if !stdout_capture_result.is_empty() { println!("{}", String::from_utf8_lossy(&stdout_capture_result)); } } Some(1) => { // Exit code 1: Differences found log::debug!("MAIN: Diff successful, differences found."); println!("{}", String::from_utf8_lossy(&stdout_capture_result)); } Some(error_code) => { // Exit code > 1: Error in diff utility eprintln!("Diff command failed with exit code: {}", error_code); if !stdout_capture_result.is_empty() { eprintln!( "Diff stdout before error:\n{}", String::from_utf8_lossy(&stdout_capture_result) ); } if !stderr_capture_result.is_empty() { eprintln!( "Diff stderr:\n{}", String::from_utf8_lossy(&stderr_capture_result) ); } return Err(anyhow::anyhow!( "Diff command reported an error (exit code {})", error_code )); } None => { // Process terminated by a signal eprintln!("Diff command terminated by signal."); if !stderr_capture_result.is_empty() { eprintln!( "Diff stderr before signal termination:\n{}", String::from_utf8_lossy(&stderr_capture_result) ); } return Err(anyhow::anyhow!("Diff command terminated by signal")); } } Ok(()) } pub fn mode_diff( cmd: &mut Command, _settings: &crate::config::Settings, _config: &crate::config::Config, ids: &mut Vec, tags: &mut Vec, conn: &mut rusqlite::Connection, data_path: std::path::PathBuf, ) -> Result<(), anyhow::Error> { validate_diff_args(cmd, ids, tags); let (item_a, item_b) = fetch_and_validate_items(conn, ids)?; let item_a_tags = get_item_tags(conn, &item_a)?; let item_b_tags = get_item_tags(conn, &item_b)?; let (item_path_a, compression_type_a, item_path_b, compression_type_b) = setup_diff_paths_and_compression(&data_path, &item_a, &item_b)?; let ((fd_a_read, fd_a_write), (fd_b_read, fd_b_write)) = setup_diff_pipes()?; let (_fd_a_read_guard, _fd_b_read_guard) = setup_fd_guards(fd_a_read, fd_b_read); let item_a_id = item_a.id.ok_or_else(|| anyhow::anyhow!("Item A missing ID"))?; let item_b_id = item_b.id.ok_or_else(|| anyhow::anyhow!("Item B missing ID"))?; let mut child_process = spawn_diff_process( item_a_id, item_a_tags, item_b_id, item_b_tags, fd_a_read, fd_b_read, )?; // Close read ends in parent process - they're now guarded by FdGuard drop(_fd_a_read_guard); drop(_fd_b_read_guard); // Spawn writer threads for both items let writer_thread_a = spawn_writer_thread(item_path_a.clone(), compression_type_a.clone(), fd_a_write); let writer_thread_b = spawn_writer_thread(item_path_b.clone(), compression_type_b.clone(), fd_b_write); // Wait for writer threads to complete (meaning all input has been sent to diff) log::debug!("MAIN: Waiting on writer thread for item A"); match writer_thread_a.join() { Ok(Ok(())) => { log::debug!("MAIN: Writer thread for item A completed successfully."); } Ok(Err(e)) => { return Err(anyhow::anyhow!("Writer thread for item A failed: {}", e)); } Err(panic_payload) => { return Err(anyhow::anyhow!( "Writer thread for item A (ID: {}) panicked: {:?}", ids[0], panic_payload )); } } log::debug!("MAIN: Waiting on writer thread for item B"); match writer_thread_b.join() { Ok(Ok(())) => { log::debug!("MAIN: Writer thread for item B completed successfully."); } Ok(Err(e)) => { return Err(anyhow::anyhow!("Writer thread for item B failed: {}", e)); } Err(panic_payload) => { return Err(anyhow::anyhow!( "Writer thread for item B (ID: {}) panicked: {:?}", ids[1], panic_payload )); } } log::debug!("MAIN: Done waiting on input-writer threads."); // Now that all input has been sent and input pipes will be closed by threads exiting, // wait for the diff child process to terminate. log::debug!("MAIN: Waiting for diff child process to finish..."); let diff_status = child_process .wait() .map_err(|e| anyhow::anyhow!("Failed to wait on diff command: {}", e))?; log::debug!( "MAIN: Diff child process finished with status: {}", diff_status ); let (stdout_capture_result, stderr_capture_result) = execute_diff_command(&mut child_process)?; handle_diff_output(diff_status, stdout_capture_result, stderr_capture_result)?; Ok(()) }