#[cfg(feature = "magic")] use magic::{Cookie, CookieFlags}; #[cfg(not(feature = "magic"))] use std::process::{Command, Stdio}; use std::io::{self, Write}; use std::path::Path; use log::debug; use crate::meta_plugin::{MetaPlugin, MetaPluginType, BaseMetaPlugin, MetaPluginResponse, MetaData, process_metadata_outputs}; #[cfg(feature = "magic")] #[derive(Debug)] pub struct MagicFileMetaPluginImpl { buffer: Vec, max_buffer_size: usize, is_finalized: bool, cookie: Option, base: BaseMetaPlugin, } #[cfg(feature = "magic")] impl MagicFileMetaPluginImpl { pub fn new( options: Option>, outputs: Option>, ) -> MagicFileMetaPluginImpl { let mut base = BaseMetaPlugin::new(); // Set default outputs let default_outputs = &["mime_type", "mime_encoding", "file_type"]; base.initialize_plugin(default_outputs, &options, &outputs); // Get max_buffer_size from options, default to PIPESIZE let max_buffer_size = base.options .get("max_buffer_size") .and_then(|v| v.as_u64()) .unwrap_or(crate::common::PIPESIZE as u64) as usize; MagicFileMetaPluginImpl { buffer: Vec::new(), max_buffer_size, is_finalized: false, cookie: None, base, } } fn get_magic_result(&self, flags: CookieFlags) -> io::Result { if let Some(cookie) = &self.cookie { cookie.set_flags(flags) .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to set magic flags: {}", e)))?; let result = cookie.buffer(&self.buffer) .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?; // Clean up the result - remove extra whitespace let trimmed = result.trim().to_string(); Ok(trimmed) } else { Err(io::Error::new(io::ErrorKind::Other, "Magic cookie not initialized")) } } fn process_magic_types(&self) -> Vec { let mut metadata = Vec::new(); let types_to_process = [ ("mime_type", CookieFlags::MIME_TYPE), ("mime_encoding", CookieFlags::MIME_ENCODING), ("file_type", CookieFlags::empty()), ]; for (name, flags) in types_to_process.iter() { if let Ok(result) = self.get_magic_result(*flags) { if !result.is_empty() { if let Some(meta_data) = process_metadata_outputs( name, serde_yaml::Value::String(result), self.base.outputs(), ) { metadata.push(meta_data); } } } } metadata } } #[cfg(feature = "magic")] impl MetaPlugin for MagicFileMetaPluginImpl { fn is_finalized(&self) -> bool { self.is_finalized } fn set_finalized(&mut self, finalized: bool) { self.is_finalized = finalized; } fn initialize(&mut self) -> MetaPluginResponse { let cookie = match Cookie::open(CookieFlags::default()) { Ok(cookie) => cookie, Err(e) => { debug!("META: MagicFile plugin: failed to create cookie: {}", e); return MetaPluginResponse { metadata: Vec::new(), is_finalized: true, }; } }; if let Err(e) = cookie.load(&[] as &[&Path]) { debug!("META: MagicFile plugin: failed to load magic database: {}", e); return MetaPluginResponse { metadata: Vec::new(), is_finalized: true, }; } self.cookie = Some(cookie); MetaPluginResponse { metadata: Vec::new(), is_finalized: false, } } fn update(&mut self, data: &[u8]) -> MetaPluginResponse { if self.is_finalized { return MetaPluginResponse { metadata: Vec::new(), is_finalized: true, }; } let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); if remaining_capacity > 0 { let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity); self.buffer.extend_from_slice(&data[..bytes_to_copy]); if self.buffer.len() >= self.max_buffer_size { let metadata = self.process_magic_types(); self.is_finalized = true; return MetaPluginResponse { metadata, is_finalized: true, }; } } MetaPluginResponse { metadata: Vec::new(), is_finalized: false, } } fn finalize(&mut self) -> MetaPluginResponse { if self.is_finalized { return MetaPluginResponse { metadata: Vec::new(), is_finalized: true, }; } let metadata = self.process_magic_types(); self.is_finalized = true; MetaPluginResponse { metadata, is_finalized: true, } } fn meta_type(&self) -> MetaPluginType { MetaPluginType::MagicFile } fn outputs(&self) -> &std::collections::HashMap { self.base.outputs() } fn outputs_mut(&mut self) -> &mut std::collections::HashMap { self.base.outputs_mut() } fn default_outputs(&self) -> Vec { vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()] } fn options(&self) -> &std::collections::HashMap { self.base.options() } fn options_mut(&mut self) -> &mut std::collections::HashMap { self.base.options_mut() } } #[cfg(not(feature = "magic"))] #[derive(Debug)] pub struct FallbackMagicFileMetaPlugin { buffer: Vec, max_buffer_size: usize, is_finalized: bool, base: BaseMetaPlugin, } #[cfg(not(feature = "magic"))] impl FallbackMagicFileMetaPlugin { pub fn new( options: Option>, outputs: Option>, ) -> FallbackMagicFileMetaPlugin { let mut base = BaseMetaPlugin::new(); // Set default outputs let default_outputs = &["mime_type", "mime_encoding", "file_type"]; base.initialize_plugin(default_outputs, &options, &outputs); // Get max_buffer_size from options, default to PIPESIZE let max_buffer_size = base.options .get("max_buffer_size") .and_then(|v| v.as_u64()) .unwrap_or(crate::common::PIPESIZE as u64) as usize; FallbackMagicFileMetaPlugin { buffer: Vec::new(), max_buffer_size, is_finalized: false, base, } } fn run_file_command(&self, buffer: &[u8]) -> io::Result { let mut temp_file = tempfile::NamedTempFile::new()?; temp_file.as_ref().write_all(buffer)?; let output = Command::new("file") .arg("-b") .arg("-m") .arg("all") .arg(temp_file.path()) .output() .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to run file command: {}", e)))?; if !output.status.success() { return Err(io::Error::new(io::ErrorKind::Other, "File command failed")); } let result = String::from_utf8_lossy(&output.stdout).trim().to_string(); Ok(result) } fn process_file_output(&self, result: &str) -> Vec { let mut metadata = Vec::new(); // Parse the file command output // file -m all output format is typically: type; charset=encoding let parts: Vec<&str> = result.split(';').map(|s| s.trim()).collect(); let file_type = parts.first().cloned().unwrap_or(result); let mime_encoding = parts.get(1) .and_then(|s| s.strip_prefix("charset=")) .cloned() .unwrap_or(""); // For mime_type, try to infer from file type or use a heuristic let mime_type = if file_type.starts_with("text") { "text/plain" } else if file_type.contains("ASCII") || file_type.contains("UTF-8") { "text/plain" } else if file_type.contains("empty") { "application/octet-stream" } else { "application/octet-stream" // default }; let outputs_to_process = [ ("mime_type", mime_type), ("mime_encoding", mime_encoding), ("file_type", file_type), ]; for (name, value) in outputs_to_process.iter() { if let Some(meta_data) = process_metadata_outputs( name, serde_yaml::Value::String(value.to_string()), self.base.outputs(), ) { metadata.push(meta_data); } } metadata } } #[cfg(not(feature = "magic"))] impl MetaPlugin for FallbackMagicFileMetaPlugin { fn is_finalized(&self) -> bool { self.is_finalized } fn set_finalized(&mut self, finalized: bool) { self.is_finalized = finalized; } fn initialize(&mut self) -> MetaPluginResponse { // No initialization needed for fallback MetaPluginResponse { metadata: Vec::new(), is_finalized: false, } } fn update(&mut self, data: &[u8]) -> MetaPluginResponse { if self.is_finalized { return MetaPluginResponse { metadata: Vec::new(), is_finalized: true, }; } let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); if remaining_capacity > 0 { let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity); self.buffer.extend_from_slice(&data[..bytes_to_copy]); if self.buffer.len() >= self.max_buffer_size { if let Ok(result) = self.run_file_command(&self.buffer) { let metadata = self.process_file_output(&result); self.is_finalized = true; return MetaPluginResponse { metadata, is_finalized: true, }; } else { // On error, finalize with empty metadata self.is_finalized = true; return MetaPluginResponse { metadata: Vec::new(), is_finalized: true, }; } } } MetaPluginResponse { metadata: Vec::new(), is_finalized: false, } } fn finalize(&mut self) -> MetaPluginResponse { if self.is_finalized { return MetaPluginResponse { metadata: Vec::new(), is_finalized: true, }; } let metadata = if !self.buffer.is_empty() { if let Ok(result) = self.run_file_command(&self.buffer) { self.process_file_output(&result) } else { Vec::new() } } else { Vec::new() }; self.is_finalized = true; MetaPluginResponse { metadata, is_finalized: true, } } fn meta_type(&self) -> MetaPluginType { MetaPluginType::MagicFile } fn outputs(&self) -> &std::collections::HashMap { self.base.outputs() } fn outputs_mut(&mut self) -> &mut std::collections::HashMap { self.base.outputs_mut() } fn default_outputs(&self) -> Vec { vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()] } fn options(&self) -> &std::collections::HashMap { self.base.options() } fn options_mut(&mut self) -> &mut std::collections::HashMap { self.base.options_mut() } } #[cfg(feature = "magic")] pub use MagicFileMetaPluginImpl as MagicFileMetaPlugin; #[cfg(not(feature = "magic"))] pub use FallbackMagicFileMetaPlugin as MagicFileMetaPlugin; use crate::meta_plugin::register_meta_plugin; #[ctor::ctor] fn register_magic_file_plugin() { register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| { Box::new(MagicFileMetaPlugin::new(options, outputs)) }); }