From c38ae0d4a9597b0c0ea3d53da816ef111c1ee327 Mon Sep 17 00:00:00 2001 From: Andrew Phillips Date: Mon, 18 Aug 2025 07:56:56 -0300 Subject: [PATCH] feat: add max_buffer_size to MagicFileMetaPlugin and refactor MetaPluginProgram Co-authored-by: aider (openai/andrew/openrouter/anthropic/claude-sonnet-4) --- src/meta_plugin/magic.rs | 88 +++++++++++++++++++++----------------- src/meta_plugin/program.rs | 83 ++++++++++++++++++----------------- 2 files changed, 91 insertions(+), 80 deletions(-) diff --git a/src/meta_plugin/magic.rs b/src/meta_plugin/magic.rs index 38c9f6d..5c134a8 100644 --- a/src/meta_plugin/magic.rs +++ b/src/meta_plugin/magic.rs @@ -1,52 +1,56 @@ use anyhow::Result; use magic::{Cookie, CookieFlags}; use std::io; -use std::io::Write; use rusqlite::Connection; use crate::meta_plugin::MetaPlugin; -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct MagicFileMetaPlugin { buffer: Vec, + max_buffer_size: usize, is_saved: bool, item_id: Option, conn: Option<*mut Connection>, + cookie: Option, } impl MagicFileMetaPlugin { pub fn new() -> MagicFileMetaPlugin { MagicFileMetaPlugin { buffer: Vec::new(), + max_buffer_size: 4096, // Same as BinaryMetaPlugin is_saved: false, item_id: None, conn: None, + cookie: None, } } fn get_magic_result(&self, flags: CookieFlags) -> io::Result { - let cookie = Cookie::open(flags) - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to open magic cookie: {}", e)))?; + if let Some(ref cookie) = self.cookie { + let result = cookie.buffer(&self.buffer) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?; - cookie.load(&[] as &[&str]) - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to load magic database: {}", e)))?; + // Clean up the result - remove extra whitespace and take first part if needed + let trimmed = result.trim(); + + // For some magic results, we might want just the first part before semicolon or comma + let cleaned = if trimmed.contains(';') { + trimmed.split(';').next().unwrap_or(trimmed).trim() + } else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) { + trimmed.split(',').next().unwrap_or(trimmed).trim() + } else { + trimmed + }; - let result = cookie.buffer(&self.buffer) - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?; - - // Clean up the result - remove extra whitespace and take first part if needed - let trimmed = result.trim(); - - // For some magic results, we might want just the first part before semicolon or comma - let cleaned = if trimmed.contains(';') { - trimmed.split(';').next().unwrap_or(trimmed).trim() - } else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) { - trimmed.split(',').next().unwrap_or(trimmed).trim() + Ok(cleaned.to_string()) } else { - trimmed - }; - - Ok(cleaned.to_string()) + Err(io::Error::new( + io::ErrorKind::Other, + "Magic cookie not initialized".to_string(), + )) + } } fn save_all_magic_metadata(&mut self) -> Result<()> { @@ -94,14 +98,17 @@ impl MetaPlugin for MagicFileMetaPlugin { true } - fn create(&self) -> Result> { - // For meta plugins, we don't actually create a writer since we're buffering data internally - Ok(Box::new(DummyWriter)) - } - fn initialize(&mut self, conn: &Connection, item_id: i64) -> Result<()> { self.item_id = Some(item_id); self.conn = Some(conn as *const Connection as *mut Connection); + + // Initialize magic cookie + let cookie = Cookie::open(CookieFlags::empty()) + .map_err(|e| anyhow::anyhow!("Failed to open magic cookie: {}", e))?; + cookie.load(&[] as &[&str]) + .map_err(|e| anyhow::anyhow!("Failed to load magic database: {}", e))?; + self.cookie = Some(cookie); + Ok(()) } @@ -117,7 +124,21 @@ impl MetaPlugin for MagicFileMetaPlugin { } fn update(&mut self, data: &[u8]) { - self.buffer.extend_from_slice(data); + // Only collect up to max_buffer_size + let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); + if remaining_capacity > 0 { + let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity); + self.buffer.extend_from_slice(&data[..bytes_to_copy]); + + // Check if we've reached our buffer limit and save if so + if self.buffer.len() >= self.max_buffer_size && !self.is_saved { + if let (Some(conn), Some(item_id)) = (self.conn, self.item_id) { + if let Err(e) = self.save_all_magic_metadata() { + eprintln!("Warning: Failed to save magic metadata early: {}", e); + } + } + } + } } fn meta_name(&mut self) -> String { @@ -125,16 +146,3 @@ impl MetaPlugin for MagicFileMetaPlugin { } } -// Dummy writer that implements Write but doesn't do anything -// This is needed to satisfy the MetaPlugin trait requirements -struct DummyWriter; - -impl Write for DummyWriter { - fn write(&mut self, buf: &[u8]) -> io::Result { - Ok(buf.len()) - } - - fn flush(&mut self) -> io::Result<()> { - Ok(()) - } -} diff --git a/src/meta_plugin/program.rs b/src/meta_plugin/program.rs index 0fd6dde..750ac60 100644 --- a/src/meta_plugin/program.rs +++ b/src/meta_plugin/program.rs @@ -1,21 +1,21 @@ -use crate::plugins::ProgramWriter; use anyhow::{Context, Result, anyhow}; use log::*; use std::io; use std::io::Write; -use std::process::{Command, Stdio}; +use std::process::{Command, Stdio, Child}; use which::which; use crate::meta_plugin::MetaPlugin; -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct MetaPluginProgram { pub program: String, pub args: Vec, pub supported: bool, pub meta_name: String, pub split_whitespace: bool, - buffer: Vec, + process: Option, + writer: Option>, } impl MetaPluginProgram { @@ -29,7 +29,8 @@ impl MetaPluginProgram { supported, meta_name, split_whitespace, - buffer: Vec::new(), + process: None, + writer: None, } } } @@ -43,8 +44,8 @@ impl MetaPlugin for MetaPluginProgram { false } - fn create(&self) -> Result> { - debug!("META: Writing using {:?}", *self); + fn initialize(&mut self, _conn: &rusqlite::Connection, _item_id: i64) -> Result<()> { + debug!("META: Initializing program plugin: {:?}", self); let program = self.program.clone(); let args = self.args.clone(); @@ -55,6 +56,7 @@ impl MetaPlugin for MetaPluginProgram { .args(args.clone()) .stdin(Stdio::piped()) .stdout(Stdio::piped()) + .stderr(Stdio::piped()) .spawn() .context(anyhow!( "Problem spawning child process: {:?} {:?}", @@ -62,58 +64,59 @@ impl MetaPlugin for MetaPluginProgram { args ))?; - Ok(Box::new(ProgramWriter { - stdin: process.stdin.take().unwrap(), - })) + let stdin = process.stdin.take().unwrap(); + self.writer = Some(Box::new(stdin)); + self.process = Some(process); + + Ok(()) } fn finalize(&mut self) -> io::Result { - let program = self.program.clone(); - let args = self.args.clone(); + debug!("META: Finalizing program plugin"); - debug!("META: Executing command for finalize: {:?} {:?}", program, args); + // Close stdin to signal EOF to the process + self.writer.take(); - let mut process = Command::new(program) - .args(args) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to spawn process: {}", e)))?; + if let Some(mut process) = self.process.take() { + let output = process.wait_with_output() + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to wait for process: {}", e)))?; - let stdin = process.stdin.as_mut().unwrap(); - stdin.write_all(&self.buffer) - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to write to stdin: {}", e)))?; - - let output = process.wait_with_output() - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to wait for process: {}", e)))?; - - if output.status.success() { - let stdout = String::from_utf8_lossy(&output.stdout); - let trimmed_result = stdout.trim(); - - // For certain programs, we only want the first part before whitespace - if self.split_whitespace { - let parts: Vec<&str> = trimmed_result.split_whitespace().collect(); - if !parts.is_empty() { - Ok(parts[0].to_string()) + if output.status.success() { + let stdout = String::from_utf8_lossy(&output.stdout); + let trimmed_result = stdout.trim(); + + // For certain programs, we only want the first part before whitespace + if self.split_whitespace { + let parts: Vec<&str> = trimmed_result.split_whitespace().collect(); + if !parts.is_empty() { + Ok(parts[0].to_string()) + } else { + Ok(trimmed_result.to_string()) + } } else { Ok(trimmed_result.to_string()) } } else { - Ok(trimmed_result.to_string()) + let stderr = String::from_utf8_lossy(&output.stderr); + Err(io::Error::new( + io::ErrorKind::Other, + format!("Command failed: {}", stderr.trim()), + )) } } else { - let stderr = String::from_utf8_lossy(&output.stderr); Err(io::Error::new( io::ErrorKind::Other, - format!("Command failed: {}", stderr.trim()), + "No process to finalize".to_string(), )) } } fn update(&mut self, data: &[u8]) { - self.buffer.extend_from_slice(data); + if let Some(ref mut writer) = self.writer { + if let Err(e) = writer.write_all(data) { + debug!("META: Failed to write to process stdin: {}", e); + } + } } fn meta_name(&mut self) -> String {