From b538e2f8c1d401021516e7655bb1395fa9cbeefd Mon Sep 17 00:00:00 2001 From: Andrew Phillips Date: Fri, 12 Sep 2025 12:29:33 -0300 Subject: [PATCH] feat: add magic file plugin with fallback to file command Co-authored-by: aider (openai/andrew/openrouter/sonoma-sky-alpha) --- src/meta_plugin/magic_file.rs | 419 +++++++++++++++++++++++++++++++++- 1 file changed, 418 insertions(+), 1 deletion(-) diff --git a/src/meta_plugin/magic_file.rs b/src/meta_plugin/magic_file.rs index e343aa2..7e87fd3 100644 --- a/src/meta_plugin/magic_file.rs +++ b/src/meta_plugin/magic_file.rs @@ -1,4 +1,421 @@ #[cfg(feature = "magic")] use magic::{Cookie, CookieFlags}; #[cfg(not(feature = "magic"))] -use std::process::{Command \ No newline at end of file +use std::process::{Command, Stdio}; + +use std::io::{self, Write}; +use log::debug; + +use crate::meta_plugin::{MetaPlugin, MetaPluginType, BaseMetaPlugin, MetaPluginResponse, MetaData, process_metadata_outputs}; + +#[cfg(feature = "magic")] +#[derive(Debug)] +pub struct MagicFileMetaPlugin { + buffer: Vec, + max_buffer_size: usize, + is_finalized: bool, + cookie: Option, + base: BaseMetaPlugin, +} + +#[cfg(feature = "magic")] +impl MagicFileMetaPlugin { + pub fn new( + options: Option>, + outputs: Option>, + ) -> MagicFileMetaPlugin { + let mut base = BaseMetaPlugin::new(); + + // Set default outputs + let default_outputs = &["mime_type", "mime_encoding", "file_type"]; + base.initialize_plugin(default_outputs, &options, &outputs); + + // Get max_buffer_size from options, default to PIPESIZE + let max_buffer_size = base.options + .get("max_buffer_size") + .and_then(|v| v.as_u64()) + .unwrap_or(crate::common::PIPESIZE as u64) as usize; + + MagicFileMetaPlugin { + buffer: Vec::new(), + max_buffer_size, + is_finalized: false, + cookie: None, + base, + } + } + + fn get_magic_result(&self, flags: CookieFlags) -> io::Result { + if let Some(cookie) = &self.cookie { + cookie.set_flags(flags) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to set magic flags: {}", e)))?; + + let result = cookie.buffer(&self.buffer) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?; + + // Clean up the result - remove extra whitespace + let trimmed = result.trim().to_string(); + + Ok(trimmed) + } else { + Err(io::Error::new(io::ErrorKind::Other, "Magic cookie not initialized")) + } + } + + fn process_magic_types(&self) -> Vec { + let mut metadata = Vec::new(); + + let types_to_process = [ + ("mime_type", CookieFlags::MIME_TYPE), + ("mime_encoding", CookieFlags::MIME_ENCODING), + ("file_type", CookieFlags::NONE), + ]; + + for (name, flags) in types_to_process.iter() { + if let Ok(result) = self.get_magic_result(*flags) { + if !result.is_empty() { + if let Some(meta_data) = process_metadata_outputs( + name, + serde_yaml::Value::String(result), + self.base.outputs(), + ) { + metadata.push(meta_data); + } + } + } + } + + metadata + } +} + +#[cfg(feature = "magic")] +impl MetaPlugin for MagicFileMetaPlugin { + fn is_finalized(&self) -> bool { + self.is_finalized + } + + fn set_finalized(&mut self, finalized: bool) { + self.is_finalized = finalized; + } + + fn initialize(&mut self) -> MetaPluginResponse { + let cookie = match Cookie::open(CookieFlags::default()) { + Ok(cookie) => cookie, + Err(e) => { + debug!("META: MagicFile plugin: failed to create cookie: {}", e); + return MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + }; + + if let Err(e) = cookie.load(&[]) { + debug!("META: MagicFile plugin: failed to load magic database: {}", e); + return MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + + self.cookie = Some(cookie); + + MetaPluginResponse { + metadata: Vec::new(), + is_finalized: false, + } + } + + fn update(&mut self, data: &[u8]) -> MetaPluginResponse { + if self.is_finalized { + return MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + + let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); + if remaining_capacity > 0 { + let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity); + self.buffer.extend_from_slice(&data[..bytes_to_copy]); + + if self.buffer.len() >= self.max_buffer_size { + let metadata = self.process_magic_types(); + self.is_finalized = true; + return MetaPluginResponse { + metadata, + is_finalized: true, + }; + } + } + + MetaPluginResponse { + metadata: Vec::new(), + is_finalized: false, + } + } + + fn finalize(&mut self) -> MetaPluginResponse { + if self.is_finalized { + return MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + + let metadata = self.process_magic_types(); + self.is_finalized = true; + + MetaPluginResponse { + metadata, + is_finalized: true, + } + } + + fn meta_type(&self) -> MetaPluginType { + MetaPluginType::MagicFile + } + + fn outputs(&self) -> &std::collections::HashMap { + self.base.outputs() + } + + fn outputs_mut(&mut self) -> &mut std::collections::HashMap { + self.base.outputs_mut() + } + + fn default_outputs(&self) -> Vec { + vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()] + } + + fn options(&self) -> &std::collections::HashMap { + self.base.options() + } + + fn options_mut(&mut self) -> &mut std::collections::HashMap { + self.base.options_mut() + } +} + +#[cfg(not(feature = "magic"))] +#[derive(Debug)] +pub struct FallbackMagicFileMetaPlugin { + buffer: Vec, + max_buffer_size: usize, + is_finalized: bool, + base: BaseMetaPlugin, +} + +#[cfg(not(feature = "magic"))] +impl FallbackMagicFileMetaPlugin { + pub fn new( + options: Option>, + outputs: Option>, + ) -> FallbackMagicFileMetaPlugin { + let mut base = BaseMetaPlugin::new(); + + // Set default outputs + let default_outputs = &["mime_type", "mime_encoding", "file_type"]; + base.initialize_plugin(default_outputs, &options, &outputs); + + // Get max_buffer_size from options, default to PIPESIZE + let max_buffer_size = base.options + .get("max_buffer_size") + .and_then(|v| v.as_u64()) + .unwrap_or(crate::common::PIPESIZE as u64) as usize; + + FallbackMagicFileMetaPlugin { + buffer: Vec::new(), + max_buffer_size, + is_finalized: false, + base, + } + } + + fn run_file_command(&self, buffer: &[u8]) -> io::Result { + let mut temp_file = tempfile::NamedTempFile::new()?; + temp_file.as_ref().write_all(buffer)?; + + let output = Command::new("file") + .arg("-b") + .arg("-m") + .arg("all") + .arg(temp_file.path()) + .output() + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to run file command: {}", e)))?; + + if !output.status.success() { + return Err(io::Error::new(io::ErrorKind::Other, "File command failed")); + } + + let result = String::from_utf8_lossy(&output.stdout).trim().to_string(); + Ok(result) + } + + fn process_file_output(&self, result: &str) -> Vec { + let mut metadata = Vec::new(); + + // Parse the file command output + // file -m all output format is typically: type; charset=encoding + let parts: Vec<&str> = result.split(';').map(|s| s.trim()).collect(); + let file_type = parts.first().cloned().unwrap_or(result); + let mime_encoding = parts.get(1) + .and_then(|s| s.strip_prefix("charset=")) + .cloned() + .unwrap_or(""); + + // For mime_type, try to infer from file type or use a heuristic + let mime_type = if file_type.starts_with("text") { + "text/plain" + } else if file_type.contains("ASCII") || file_type.contains("UTF-8") { + "text/plain" + } else if file_type.contains("empty") { + "application/octet-stream" + } else { + "application/octet-stream" // default + }; + + let outputs_to_process = [ + ("mime_type", mime_type), + ("mime_encoding", mime_encoding), + ("file_type", file_type), + ]; + + for (name, value) in outputs_to_process.iter() { + if let Some(meta_data) = process_metadata_outputs( + name, + serde_yaml::Value::String(value.to_string()), + self.base.outputs(), + ) { + metadata.push(meta_data); + } + } + + metadata + } +} + +#[cfg(not(feature = "magic"))] +impl MetaPlugin for FallbackMagicFileMetaPlugin { + fn is_finalized(&self) -> bool { + self.is_finalized + } + + fn set_finalized(&mut self, finalized: bool) { + self.is_finalized = finalized; + } + + fn initialize(&mut self) -> MetaPluginResponse { + // No initialization needed for fallback + MetaPluginResponse { + metadata: Vec::new(), + is_finalized: false, + } + } + + fn update(&mut self, data: &[u8]) -> MetaPluginResponse { + if self.is_finalized { + return MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + + let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); + if remaining_capacity > 0 { + let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity); + self.buffer.extend_from_slice(&data[..bytes_to_copy]); + + if self.buffer.len() >= self.max_buffer_size { + if let Ok(result) = self.run_file_command(&self.buffer) { + let metadata = self.process_file_output(&result); + self.is_finalized = true; + return MetaPluginResponse { + metadata, + is_finalized: true, + }; + } else { + // On error, finalize with empty metadata + self.is_finalized = true; + return MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + } + } + + MetaPluginResponse { + metadata: Vec::new(), + is_finalized: false, + } + } + + fn finalize(&mut self) -> MetaPluginResponse { + if self.is_finalized { + return MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + + let metadata = if !self.buffer.is_empty() { + if let Ok(result) = self.run_file_command(&self.buffer) { + self.process_file_output(&result) + } else { + Vec::new() + } + } else { + Vec::new() + }; + + self.is_finalized = true; + + MetaPluginResponse { + metadata, + is_finalized: true, + } + } + + fn meta_type(&self) -> MetaPluginType { + MetaPluginType::MagicFile + } + + fn outputs(&self) -> &std::collections::HashMap { + self.base.outputs() + } + + fn outputs_mut(&mut self) -> &mut std::collections::HashMap { + self.base.outputs_mut() + } + + fn default_outputs(&self) -> Vec { + vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()] + } + + fn options(&self) -> &std::collections::HashMap { + self.base.options() + } + + fn options_mut(&mut self) -> &mut std::collections::HashMap { + self.base.options_mut() + } +} + +#[cfg(feature = "magic")] +use MagicFileMetaPlugin as MagicFileMetaPluginImpl; + +#[cfg(not(feature = "magic"))] +use FallbackMagicFileMetaPlugin as MagicFileMetaPluginImpl; + +pub use MagicFileMetaPluginImpl as MagicFileMetaPlugin; + +use crate::meta_plugin::register_meta_plugin; + +#[ctor::ctor] +fn register_magic_file_plugin() { + register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| { + Box::new(MagicFileMetaPlugin::new(options, outputs)) + }); +}